summaryrefslogtreecommitdiffstats
path: root/security/nss/lib/freebl/mpi
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /security/nss/lib/freebl/mpi
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'security/nss/lib/freebl/mpi')
-rw-r--r--security/nss/lib/freebl/mpi/README646
-rw-r--r--security/nss/lib/freebl/mpi/doc/LICENSE11
-rw-r--r--security/nss/lib/freebl/mpi/doc/LICENSE-MPL3
-rw-r--r--security/nss/lib/freebl/mpi/doc/basecvt.pod65
-rwxr-xr-xsecurity/nss/lib/freebl/mpi/doc/build30
-rw-r--r--security/nss/lib/freebl/mpi/doc/div.txt64
-rw-r--r--security/nss/lib/freebl/mpi/doc/expt.txt94
-rw-r--r--security/nss/lib/freebl/mpi/doc/gcd.pod28
-rw-r--r--security/nss/lib/freebl/mpi/doc/invmod.pod34
-rw-r--r--security/nss/lib/freebl/mpi/doc/isprime.pod63
-rw-r--r--security/nss/lib/freebl/mpi/doc/lap.pod36
-rw-r--r--security/nss/lib/freebl/mpi/doc/mpi-test.pod51
-rw-r--r--security/nss/lib/freebl/mpi/doc/mul.txt77
-rw-r--r--security/nss/lib/freebl/mpi/doc/pi.txt53
-rw-r--r--security/nss/lib/freebl/mpi/doc/prime.txt6542
-rw-r--r--security/nss/lib/freebl/mpi/doc/prng.pod38
-rw-r--r--security/nss/lib/freebl/mpi/doc/redux.txt86
-rw-r--r--security/nss/lib/freebl/mpi/doc/sqrt.txt50
-rw-r--r--security/nss/lib/freebl/mpi/doc/square.txt72
-rw-r--r--security/nss/lib/freebl/mpi/doc/timing.txt213
-rw-r--r--security/nss/lib/freebl/mpi/hpma512.s615
-rw-r--r--security/nss/lib/freebl/mpi/hppa20.s904
-rw-r--r--security/nss/lib/freebl/mpi/logtab.h28
-rw-r--r--security/nss/lib/freebl/mpi/montmulf.c286
-rw-r--r--security/nss/lib/freebl/mpi/montmulf.h65
-rw-r--r--security/nss/lib/freebl/mpi/montmulf.il108
-rw-r--r--security/nss/lib/freebl/mpi/montmulf.s1938
-rw-r--r--security/nss/lib/freebl/mpi/montmulfv8.il108
-rw-r--r--security/nss/lib/freebl/mpi/montmulfv8.s1818
-rw-r--r--security/nss/lib/freebl/mpi/montmulfv9.il93
-rw-r--r--security/nss/lib/freebl/mpi/montmulfv9.s2346
-rw-r--r--security/nss/lib/freebl/mpi/mp_comba.c3235
-rw-r--r--security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm13066
-rw-r--r--security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s16097
-rw-r--r--security/nss/lib/freebl/mpi/mp_gf2m-priv.h73
-rw-r--r--security/nss/lib/freebl/mpi/mp_gf2m.c677
-rw-r--r--security/nss/lib/freebl/mpi/mp_gf2m.h28
-rw-r--r--security/nss/lib/freebl/mpi/mpcpucache.c788
-rw-r--r--security/nss/lib/freebl/mpi/mpcpucache_amd64.s861
-rw-r--r--security/nss/lib/freebl/mpi/mpcpucache_x86.s902
-rw-r--r--security/nss/lib/freebl/mpi/mpi-config.h56
-rw-r--r--security/nss/lib/freebl/mpi/mpi-priv.h246
-rw-r--r--security/nss/lib/freebl/mpi/mpi.c5241
-rw-r--r--security/nss/lib/freebl/mpi/mpi.h363
-rw-r--r--security/nss/lib/freebl/mpi/mpi_amd64.c32
-rw-r--r--security/nss/lib/freebl/mpi/mpi_amd64_common.S409
-rw-r--r--security/nss/lib/freebl/mpi/mpi_amd64_masm.asm388
-rw-r--r--security/nss/lib/freebl/mpi/mpi_amd64_sun.s385
-rw-r--r--security/nss/lib/freebl/mpi/mpi_arm.c175
-rw-r--r--security/nss/lib/freebl/mpi/mpi_hp.c81
-rw-r--r--security/nss/lib/freebl/mpi/mpi_i86pc.s313
-rw-r--r--security/nss/lib/freebl/mpi/mpi_mips.s472
-rw-r--r--security/nss/lib/freebl/mpi/mpi_sparc.c226
-rw-r--r--security/nss/lib/freebl/mpi/mpi_sse2.s294
-rw-r--r--security/nss/lib/freebl/mpi/mpi_x86.s541
-rw-r--r--security/nss/lib/freebl/mpi/mpi_x86_asm.c531
-rw-r--r--security/nss/lib/freebl/mpi/mpi_x86_os2.s538
-rw-r--r--security/nss/lib/freebl/mpi/mplogic.c460
-rw-r--r--security/nss/lib/freebl/mpi/mplogic.h55
-rw-r--r--security/nss/lib/freebl/mpi/mpmontg.c1160
-rw-r--r--security/nss/lib/freebl/mpi/mpprime.c610
-rw-r--r--security/nss/lib/freebl/mpi/mpprime.h48
-rw-r--r--security/nss/lib/freebl/mpi/mpv_sparc.c221
-rw-r--r--security/nss/lib/freebl/mpi/mpv_sparcv8.s1607
-rw-r--r--security/nss/lib/freebl/mpi/mpv_sparcv9.s1645
-rw-r--r--security/nss/lib/freebl/mpi/mpvalpha.c183
-rw-r--r--security/nss/lib/freebl/mpi/mulsqr.c84
-rw-r--r--security/nss/lib/freebl/mpi/primes.c841
-rw-r--r--security/nss/lib/freebl/mpi/vis_32.il1291
-rw-r--r--security/nss/lib/freebl/mpi/vis_64.il997
-rw-r--r--security/nss/lib/freebl/mpi/vis_proto.h234
71 files changed, 72019 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/mpi/README b/security/nss/lib/freebl/mpi/README
new file mode 100644
index 0000000000..a49aa9d8d7
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/README
@@ -0,0 +1,646 @@
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+About the MPI Library
+---------------------
+
+The files 'mpi.h' and 'mpi.c' define a simple, arbitrary precision
+signed integer arithmetic package. The implementation is not the most
+efficient possible, but the code is small and should be fairly easily
+portable to just about any machine that supports an ANSI C compiler,
+as long as it is capable of at least 16-bit arithmetic (but also see
+below for more on this).
+
+This library was written with an eye to cryptographic applications;
+thus, some care is taken to make sure that temporary values are not
+left lying around in memory when they are no longer in use. This adds
+some overhead for zeroing buffers before they are released back into
+the free pool; however, it gives you the assurance that there is only
+one copy of your important values residing in your process's address
+space at a time. Obviously, it is difficult to guarantee anything, in
+a pre-emptive multitasking environment, but this at least helps you
+keep a lid on the more obvious ways your data can get spread around in
+memory.
+
+
+Using the Library
+-----------------
+
+To use the MPI library in your program, you must include the header:
+
+#include "mpi.h"
+
+This header provides all the type and function declarations you'll
+need to use the library. Almost all the names defined by the library
+begin with the prefix 'mp_', so it should be easy to keep them from
+clashing with your program's namespace (he says, glibly, knowing full
+well there are always pathological cases).
+
+There are a few things you may want to configure about the library.
+By default, the MPI library uses an unsigned short for its digit type,
+and an unsigned int for its word type. The word type must be big
+enough to contain at least two digits, for the primitive arithmetic to
+work out. On my machine, a short is 2 bytes and an int is 4 bytes --
+but if you have 64-bit ints, you might want to use a 4-byte digit and
+an 8-byte word. I have tested the library using 1-byte digits and
+2-byte words, as well. Whatever you choose to do, the things you need
+to change are:
+
+(1) The type definitions for mp_digit and mp_word.
+
+(2) The macro DIGIT_FMT which tells mp_print() how to display a
+ single digit. This is just a printf() format string, so you
+ can adjust it appropriately.
+
+(3) The macros DIGIT_MAX and MP_WORD_MAX, which specify the
+ largest value expressible in an mp_digit and an mp_word,
+ respectively.
+
+Both the mp_digit and mp_word should be UNSIGNED integer types. The
+code relies on having the full positive precision of the type used for
+digits and words.
+
+The remaining type definitions should be left alone, for the most
+part. The code in the library does not make any significant
+assumptions about the sizes of things, but there is little if any
+reason to change the other parameters, so I would recommend you leave
+them as you found them.
+
+
+Conventions
+-----------
+
+Most functions in the library return a value of type mp_err. This
+permits the library to communicate success or various kinds of failure
+to the calling program. The return values currently defined are:
+
+ MP_OKAY - okay, operation succeeded, all's well
+ MP_YES - okay, the answer is yes (same as MP_OKAY)
+ MP_NO - okay, but answer is no (not MP_OKAY)
+ MP_MEM - operation ran out of memory
+ MP_RANGE - input parameter was out of range
+ MP_BADARG - an invalid input parameter was provided
+ MP_UNDEF - no output value is defined for this input
+
+The only function which currently uses MP_UNDEF is mp_invmod().
+Division by zero is undefined, but the division functions will return
+MP_RANGE for a zero divisor. MP_BADARG usually means you passed a
+bogus mp_int structure to the function. MP_YES and MP_NO are not used
+by the library itself; they're defined so you can use them in your own
+extensions.
+
+If you need a readable interpretation of these error codes in your
+program, you may also use the mp_strerror() function. This function
+takes an mp_err as input, and returns a pointer to a human-readable
+string describing the meaning of the error. These strings are stored
+as constants within the library, so the caller should not attempt to
+modify or free the memory associated with these strings.
+
+The library represents values in signed-magnitude format. Values
+strictly less than zero are negative, all others are considered
+positive (zero is positive by fiat). You can access the 'sign' member
+of the mp_int structure directly, but better is to use the mp_cmp_z()
+function, to find out which side of zero the value lies on.
+
+Most arithmetic functions have a single-digit variant, as well as the
+full arbitrary-precision. An mp_digit is an unsigned value between 0
+and DIGIT_MAX inclusive. The radix is available as RADIX. The number
+of bits in a given digit is given as DIGIT_BIT.
+
+Generally, input parameters are given before output parameters.
+Unless otherwise specified, any input parameter can be re-used as an
+output parameter, without confusing anything.
+
+The basic numeric type defined by the library is an mp_int. Virtually
+all the functions in the library take a pointer to an mp_int as one of
+their parameters. An explanation of how to create and use these
+structures follows. And so, without further ado...
+
+
+Initialization and Cleanup
+--------------------------
+
+The basic numeric type defined by the library is an 'mp_int'.
+However, it is not sufficient to simply declare a variable of type
+mp_int in your program. These variables also need to be initialized
+before they can be used, to allocate the internal storage they require
+for computation.
+
+This is done using one of the following functions:
+
+ mp_init(mp_int *mp);
+ mp_init_copy(mp_int *mp, mp_int *from);
+ mp_init_size(mp_int *mp, mp_size p);
+
+Each of these requires a pointer to a structure of type mp_int. The
+basic mp_init() simply initializes the mp_int to a default size, and
+sets its value to zero. If you would like to initialize a copy of an
+existing mp_int, use mp_init_copy(), where the 'from' parameter is the
+mp_int you'd like to make a copy of. The third function,
+mp_init_size(), permits you to specify how many digits of precision
+should be preallocated for your mp_int. This can help the library
+avoid unnecessary re-allocations later on.
+
+The default precision used by mp_init() can be retrieved using:
+
+ precision = mp_get_prec();
+
+This returns the number of digits that will be allocated. You can
+change this value by using:
+
+ mp_set_prec(unsigned int prec);
+
+Any positive value is acceptable -- if you pass zero, the default
+precision will be re-set to the compiled-in library default (this is
+specified in the header file 'mpi-config.h', and typically defaults to
+8 or 16).
+
+Just as you must allocate an mp_int before you can use it, you must
+clean up the structure when you are done with it. This is performed
+using the mp_clear() function. Remember that any mp_int that you
+create as a local variable in a function must be mp_clear()'d before
+that function exits, or else the memory allocated to that mp_int will
+be orphaned and unrecoverable.
+
+To set an mp_int to a given value, the following functions are given:
+
+ mp_set(mp_int *mp, mp_digit d);
+ mp_set_int(mp_int *mp, long z);
+ mp_set_ulong(mp_int *mp, unsigned long z);
+
+The mp_set() function sets the mp_int to a single digit value, while
+mp_set_int() sets the mp_int to a signed long integer value.
+
+To set an mp_int to zero, use:
+
+ mp_zero(mp_int *mp);
+
+
+Copying and Moving
+------------------
+
+If you have two initialized mp_int's, and you want to copy the value
+of one into the other, use:
+
+ mp_copy(from, to)
+
+This takes care of clearing the old value of 'to', and copies the new
+value into it. If 'to' is not yet initialized, use mp_init_copy()
+instead (see above).
+
+Note: The library tries, whenever possible, to avoid allocating
+---- new memory. Thus, mp_copy() tries first to satisfy the needs
+ of the copy by re-using the memory already allocated to 'to'.
+ Only if this proves insufficient will mp_copy() actually
+ allocate new memory.
+
+ For this reason, if you know a priori that 'to' has enough
+ available space to hold 'from', you don't need to check the
+ return value of mp_copy() for memory failure. The USED()
+ macro tells you how many digits are used by an mp_int, and
+ the ALLOC() macro tells you how many are allocated.
+
+If you have two initialized mp_int's, and you want to exchange their
+values, use:
+
+ mp_exch(a, b)
+
+This is better than using mp_copy() with a temporary, since it will
+not (ever) touch the memory allocator -- it just swaps the exact
+contents of the two structures. The mp_exch() function cannot fail;
+if you pass it an invalid structure, it just ignores it, and does
+nothing.
+
+
+Basic Arithmetic
+----------------
+
+Once you have initialized your integers, you can operate on them. The
+basic arithmetic functions on full mp_int values are:
+
+mp_add(a, b, c) - computes c = a + b
+mp_sub(a, b, c) - computes c = a - b
+mp_mul(a, b, c) - computes c = a * b
+mp_sqr(a, b) - computes b = a * a
+mp_div(a, b, q, r) - computes q, r such that a = bq + r
+mp_div_2d(a, d, q, r) - computes q = a / 2^d, r = a % 2^d
+mp_expt(a, b, c) - computes c = a ** b
+mp_2expt(a, k) - computes a = 2^k
+
+The mp_div_2d() function efficiently computes division by powers of
+two. Either the q or r parameter may be NULL, in which case that
+portion of the computation will be discarded.
+
+The algorithms used for some of the computations here are described in
+the following files which are included with this distribution:
+
+mul.txt Describes the multiplication algorithm
+div.txt Describes the division algorithm
+expt.txt Describes the exponentiation algorithm
+sqrt.txt Describes the square-root algorithm
+square.txt Describes the squaring algorithm
+
+There are single-digit versions of most of these routines, as well.
+In the following prototypes, 'd' is a single mp_digit:
+
+mp_add_d(a, d, c) - computes c = a + d
+mp_sub_d(a, d, c) - computes c = a - d
+mp_mul_d(a, d, c) - computes c = a * d
+mp_mul_2(a, c) - computes c = a * 2
+mp_div_d(a, d, q, r) - computes q, r such that a = bq + r
+mp_div_2(a, c) - computes c = a / 2
+mp_expt_d(a, d, c) - computes c = a ** d
+
+The mp_mul_2() and mp_div_2() functions take advantage of the internal
+representation of an mp_int to do multiplication by two more quickly
+than mp_mul_d() would. Other basic functions of an arithmetic variety
+include:
+
+mp_zero(a) - assign 0 to a
+mp_neg(a, c) - negate a: c = -a
+mp_abs(a, c) - absolute value: c = |a|
+
+
+Comparisons
+-----------
+
+Several comparison functions are provided. Each of these, unless
+otherwise specified, returns zero if the comparands are equal, < 0 if
+the first is less than the second, and > 0 if the first is greater
+than the second:
+
+mp_cmp_z(a) - compare a <=> 0
+mp_cmp_d(a, d) - compare a <=> d, d is a single digit
+mp_cmp(a, b) - compare a <=> b
+mp_cmp_mag(a, b) - compare |a| <=> |b|
+mp_isodd(a) - return nonzero if odd, zero otherwise
+mp_iseven(a) - return nonzero if even, zero otherwise
+
+
+Modular Arithmetic
+------------------
+
+Modular variations of the basic arithmetic functions are also
+supported. These are available if the MP_MODARITH parameter in
+mpi-config.h is turned on (it is by default). The modular arithmetic
+functions are:
+
+mp_mod(a, m, c) - compute c = a (mod m), 0 <= c < m
+mp_mod_d(a, d, c) - compute c = a (mod d), 0 <= c < d (see below)
+mp_addmod(a, b, m, c) - compute c = (a + b) mod m
+mp_submod(a, b, m, c) - compute c = (a - b) mod m
+mp_mulmod(a, b, m, c) - compute c = (a * b) mod m
+mp_sqrmod(a, m, c) - compute c = (a * a) mod m
+mp_exptmod(a, b, m, c) - compute c = (a ** b) mod m
+mp_exptmod_d(a, d, m, c)- compute c = (a ** d) mod m
+
+The mp_sqr() function squares its input argument. A call to mp_sqr(a,
+c) is identical in meaning to mp_mul(a, a, c); however, if the
+MP_SQUARE variable is set true in mpi-config.h (see below), then it
+will be implemented with a different algorithm, that is supposed to
+take advantage of the redundant computation that takes place during
+squaring. Unfortunately, some compilers result in worse performance
+on this code, so you can change the behaviour at will. There is a
+utility program "mulsqr.c" that lets you test which does better on
+your system.
+
+The mp_sqrmod() function is analogous to the mp_sqr() function; it
+uses the mp_sqr() function rather than mp_mul(), and then performs the
+modular reduction. This probably won't help much unless you are doing
+a lot of them.
+
+See the file 'square.txt' for a synopsis of the algorithm used.
+
+Note: The mp_mod_d() function computes a modular reduction around
+---- a single digit d. The result is a single digit c.
+
+Because an inverse is defined for a (mod m) if and only if (a, m) = 1
+(that is, if a and m are relatively prime), mp_invmod() may not be
+able to compute an inverse for the arguments. In this case, it
+returns the value MP_UNDEF, and does not modify c. If an inverse is
+defined, however, it returns MP_OKAY, and sets c to the value of the
+inverse (mod m).
+
+See the file 'redux.txt' for a description of the modular reduction
+algorithm used by mp_exptmod().
+
+
+Greatest Common Divisor
+-----------------------
+
+If The greates common divisor of two values can be found using one of the
+following functions:
+
+mp_gcd(a, b, c) - compute c = (a, b) using binary algorithm
+mp_lcm(a, b, c) - compute c = [a, b] = ab / (a, b)
+mp_xgcd(a, b, g, x, y) - compute g, x, y so that ax + by = g = (a, b)
+
+Also provided is a function to compute modular inverses, if they
+exist:
+
+mp_invmod(a, m, c) - compute c = a^-1 (mod m), if it exists
+
+The function mp_xgcd() computes the greatest common divisor, and also
+returns values of x and y satisfying Bezout's identity. This is used
+by mp_invmod() to find modular inverses. However, if you do not need
+these values, you will find that mp_gcd() is MUCH more efficient,
+since it doesn't need all the intermediate values that mp_xgcd()
+requires in order to compute x and y.
+
+The mp_gcd() (and mp_xgcd()) functions use the binary (extended) GCD
+algorithm due to Josef Stein.
+
+
+Input & Output Functions
+------------------------
+
+The following basic I/O routines are provided. These are present at
+all times:
+
+mp_read_radix(mp, str, r) - convert a string in radix r to an mp_int
+mp_read_raw(mp, s, len) - convert a string of bytes to an mp_int
+mp_radix_size(mp, r) - return length of buffer needed by mp_toradix()
+mp_raw_size(mp) - return length of buffer needed by mp_toraw()
+mp_toradix(mp, str, r) - convert an mp_int to a string of radix r
+ digits
+mp_toraw(mp, str) - convert an mp_int to a string of bytes
+mp_tovalue(ch, r) - convert ch to its value when taken as
+ a radix r digit, or -1 if invalid
+mp_strerror(err) - get a string describing mp_err value 'err'
+
+If you compile the MPI library with MP_IOFUNC defined, you will also
+have access to the following additional I/O function:
+
+mp_print(mp, ofp) - print an mp_int as text to output stream ofp
+
+Note that mp_radix_size() returns a size in bytes guaranteed to be AT
+LEAST big enough for the digits output by mp_toradix(). Because it
+uses an approximation technique to figure out how many digits will be
+needed, it may return a figure which is larger than necessary. Thus,
+the caller should not rely on the value to determine how many bytes
+will actually be written by mp_toradix(). The string mp_toradix()
+creates will be NUL terminated, so the standard C library function
+strlen() should be able to ascertain this for you, if you need it.
+
+The mp_read_radix() and mp_toradix() functions support bases from 2 to
+64 inclusive. If you require more general radix conversion facilities
+than this, you will need to write them yourself (that's why mp_div_d()
+is provided, after all).
+
+Note: mp_read_radix() will accept as digits either capital or
+---- lower-case letters. However, the current implementation of
+ mp_toradix() only outputs upper-case letters, when writing
+ bases betwee 10 and 36. The underlying code supports using
+ lower-case letters, but the interface stub does not have a
+ selector for it. You can add one yourself if you think it
+ is worthwhile -- I do not. Bases from 36 to 64 use lower-
+ case letters as distinct from upper-case. Bases 63 and
+ 64 use the characters '+' and '/' as digits.
+
+ Note also that compiling with MP_IOFUNC defined will cause
+ inclusion of <stdio.h>, so if you are trying to write code
+ which does not depend on the standard C library, you will
+ probably want to avoid this option. This is needed because
+ the mp_print() function takes a standard library FILE * as
+ one of its parameters, and uses the fprintf() function.
+
+The mp_toraw() function converts the integer to a sequence of bytes,
+in big-endian ordering (most-significant byte first). Assuming your
+bytes are 8 bits wide, this corresponds to base 256. The sign is
+encoded as a single leading byte, whose value is 0 for zero or
+positive values, or 1 for negative values. The mp_read_raw() function
+reverses this process -- it takes a buffer of bytes, interprets the
+first as a sign indicator (0 = zero/positive, nonzero = negative), and
+the rest as a sequence of 1-byte digits in big-endian ordering.
+
+The mp_raw_size() function returns the exact number of bytes required
+to store the given integer in "raw" format (as described in the
+previous paragraph). Zero is returned in case of error; a valid
+integer will require at least three bytes of storage.
+
+In previous versions of the MPI library, an "external representation
+format" was supported. This was removed, however, because I found I
+was never using it, it was not as portable as I would have liked, and
+I decided it was a waste of space.
+
+
+Other Functions
+---------------
+
+The files 'mpprime.h' and 'mpprime.c' define some routines which are
+useful for divisibility testing and probabilistic primality testing.
+The routines defined are:
+
+mpp_divis(a, b) - is a divisible by b?
+mpp_divis_d(a, d) - is a divisible by digit d?
+mpp_random(a) - set a to random value at current precision
+mpp_random_size(a, prec) - set a to random value at given precision
+
+Note: The mpp_random() and mpp_random_size() functions use the C
+---- library's rand() function to generate random values. It is
+ up to the caller to seed this generator before it is called.
+ These functions are not suitable for generating quantities
+ requiring cryptographic-quality randomness; they are intended
+ primarily for use in primality testing.
+
+ Note too that the MPI library does not call srand(), so your
+ application should do this, if you ever want the sequence
+ to change.
+
+mpp_divis_vector(a, v, s, w) - is a divisible by any of the s digits
+ in v? If so, let w be the index of
+ that digit
+
+mpp_divis_primes(a, np) - is a divisible by any of the first np
+ primes? If so, set np to the prime
+ which divided a.
+
+mpp_fermat(a, d) - test if w^a = w (mod a). If so,
+ returns MP_YES, otherwise MP_NO.
+
+mpp_pprime(a, nt) - perform nt iterations of the Rabin-
+ Miller probabilistic primality test
+ on a. Returns MP_YES if all tests
+ passed, or MP_NO if any test fails.
+
+The mpp_fermat() function works based on Fermat's little theorem, a
+consequence of which is that if p is a prime, and (w, p) = 1, then:
+
+ w^p = w (mod p)
+
+Put another way, if w^p != w (mod p), then p is not prime. The test
+is expensive to compute, but it helps to quickly eliminate an enormous
+class of composite numbers prior to Rabin-Miller testing.
+
+Building the Library
+--------------------
+
+The MPI library is designed to be as self-contained as possible. You
+should be able to compile it with your favourite ANSI C compiler, and
+link it into your program directly. If you are on a Unix system using
+the GNU C compiler (gcc), the following should work:
+
+% gcc -ansi -pedantic -Wall -O2 -c mpi.c
+
+The file 'mpi-config.h' defines several configurable parameters for
+the library, which you can adjust to suit your application. At the
+time of this writing, the available options are:
+
+MP_IOFUNC - Define true to include the mp_print() function,
+ which is moderately useful for debugging. This
+ implicitly includes <stdio.h>.
+
+MP_MODARITH - Define true to include the modular arithmetic
+ functions. If you don't need modular arithmetic
+ in your application, you can set this to zero to
+ leave out all the modular routines.
+
+MP_LOGTAB - If true, the file "logtab.h" is included, which
+ is basically a static table of base 2 logarithms.
+ These are used to compute how big the buffers for
+ radix conversion need to be. If you set this false,
+ the library includes <math.h> and uses log(). This
+ typically forces you to link against math libraries.
+
+
+MP_ARGCHK - Set to 0, 1, or 2. This defines how the argument
+ checking macro, ARGCHK(), gets expanded. If this
+ is set to zero, ARGCHK() expands to nothing; no
+ argument checks are performed. If this is 1, the
+ ARGCHK() macro expands to code that returns MP_BADARG
+ or similar at runtime. If it is 2, ARGCHK() expands
+ to an assert() call that aborts the program on a
+ bad input.
+
+MP_DEBUG - Turns on debugging output. This is probably not at
+ all useful unless you are debugging the library. It
+ tends to spit out a LOT of output.
+
+MP_DEFPREC - The default precision of a newly-created mp_int, in
+ digits. The precision can be changed at runtime by
+ the mp_set_prec() function, but this is its initial
+ value.
+
+MP_SQUARE - If this is set to a nonzero value, the mp_sqr()
+ function will use an alternate algorithm that takes
+ advantage of the redundant inner product computation
+ when both multiplicands are identical. Unfortunately,
+ with some compilers this is actually SLOWER than just
+ calling mp_mul() with the same argument twice. So
+ if you set MP_SQUARE to zero, mp_sqr() will be expan-
+ ded into a call to mp_mul(). This applies to all
+ the uses of mp_sqr(), including mp_sqrmod() and the
+ internal calls to s_mp_sqr() inside mpi.c
+
+ The program 'mulsqr' (mulsqr.c) can be used to test
+ which works best for your configuration. Set up the
+ CC and CFLAGS variables in the Makefile, then type:
+
+ make mulsqr
+
+ Invoke it with arguments similar to the following:
+
+ mulsqr 25000 1024
+
+ That is, 25000 products computed on 1024-bit values.
+ The output will compare the two timings, and recommend
+ a setting for MP_SQUARE. It is off by default.
+
+If you would like to use the mp_print() function (see above), be sure
+to define MP_IOFUNC in mpi-config.h. Many of the test drivers in the
+'tests' subdirectory expect this to be defined (although the test
+driver 'mpi-test' doesn't need it)
+
+The Makefile which comes with the library should take care of building
+the library for you, if you have set the CC and CFLAGS variables at
+the top of the file appropriately. By default, they are set up to
+use the GNU C compiler:
+
+CC=gcc
+CFLAGS=-ansi -pedantic -Wall -O2
+
+If all goes well, the library should compile without warnings using
+this combination. You should, of course, make whatever adjustments
+you find necessary.
+
+The MPI library distribution comes with several additional programs
+which are intended to demonstrate the use of the library, and provide
+a framework for testing it. There are a handful of test driver
+programs, in the files named 'mptest-X.c', where X is a digit. Also,
+there are some simple command-line utilities (in the 'utils'
+directory) for manipulating large numbers. These include:
+
+basecvt.c A radix-conversion program, supporting bases from
+ 2 to 64 inclusive.
+
+bbsrand.c A BBS (quadratic residue) pseudo-random number
+ generator. The file 'bbsrand.c' is just the driver
+ for the program; the real code lives in the files
+ 'bbs_rand.h' and 'bbs_rand.c'
+
+dec2hex.c Converts decimal to hexadecimal
+
+gcd.c Computes the greatest common divisor of two values.
+ If invoked as 'xgcd', also computes constants x and
+ y such that (a, b) = ax + by, in accordance with
+ Bezout's identity.
+
+hex2dec.c Converts hexadecimal to decimal
+
+invmod.c Computes modular inverses
+
+isprime.c Performs the Rabin-Miller probabilistic primality
+ test on a number. Values which fail this test are
+ definitely composite, and those which pass are very
+ likely to be prime (although there are no guarantees)
+
+lap.c Computes the order (least annihilating power) of
+ a value v modulo m. Very dumb algorithm.
+
+primegen.c Generates large (probable) primes.
+
+prng.c A pseudo-random number generator based on the
+ BBS generator code in 'bbs_rand.c'
+
+sieve.c Implements the Sieve of Eratosthenes, using a big
+ bitmap, to generate a list of prime numbers.
+
+fact.c Computes the factorial of an arbitrary precision
+ integer (iterative).
+
+exptmod.c Computes arbitrary precision modular exponentiation
+ from the command line (exptmod a b m -> a^b (mod m))
+
+Most of these can be built from the Makefile that comes with the
+library. Try 'make tools', if your environment supports it.
+
+
+Acknowledgements:
+----------------
+
+The algorithms used in this library were drawn primarily from Volume
+2 of Donald Knuth's magnum opus, _The Art of Computer Programming_,
+"Semi-Numerical Methods". Barrett's algorithm for modular reduction
+came from Menezes, Oorschot, and Vanstone's _Handbook of Applied
+Cryptography_, Chapter 14.
+
+Thanks are due to Tom St. Denis, for finding an obnoxious sign-related
+bug in mp_read_raw() that made things break on platforms which use
+signed chars.
+
+About the Author
+----------------
+
+This software was written by Michael J. Fromberger. You can contact
+the author as follows:
+
+E-mail: <sting@linguist.dartmouth.edu>
+
+Postal: 8000 Cummings Hall, Thayer School of Engineering
+ Dartmouth College, Hanover, New Hampshire, USA
+
+PGP key: http://linguist.dartmouth.edu/~sting/keys/mjf.html
+ 9736 188B 5AFA 23D6 D6AA BE0D 5856 4525 289D 9907
+
+Last updated: 16-Jan-2000
diff --git a/security/nss/lib/freebl/mpi/doc/LICENSE b/security/nss/lib/freebl/mpi/doc/LICENSE
new file mode 100644
index 0000000000..35cca68ce9
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/LICENSE
@@ -0,0 +1,11 @@
+Within this directory, each of the file listed below is licensed under
+the terms given in the file LICENSE-MPL, also in this directory.
+
+basecvt.pod
+gcd.pod
+invmod.pod
+isprime.pod
+lap.pod
+mpi-test.pod
+prime.txt
+prng.pod
diff --git a/security/nss/lib/freebl/mpi/doc/LICENSE-MPL b/security/nss/lib/freebl/mpi/doc/LICENSE-MPL
new file mode 100644
index 0000000000..41dc2327f1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/LICENSE-MPL
@@ -0,0 +1,3 @@
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/basecvt.pod b/security/nss/lib/freebl/mpi/doc/basecvt.pod
new file mode 100644
index 0000000000..c3d87fbc7e
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/basecvt.pod
@@ -0,0 +1,65 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ basecvt - radix conversion for arbitrary precision integers
+
+=head1 SYNOPSIS
+
+ basecvt <ibase> <obase> [values]
+
+=head1 DESCRIPTION
+
+The B<basecvt> program is a command-line tool for converting integers
+of arbitrary precision from one radix to another. The current version
+supports radix values from 2 (binary) to 64, inclusive. The first two
+command line arguments specify the input and output radix, in base 10.
+Any further arguments are taken to be integers notated in the input
+radix, and these are converted to the output radix. The output is
+written, one integer per line, to standard output.
+
+When reading integers, only digits considered "valid" for the input
+radix are considered. Processing of an integer terminates when an
+invalid input digit is encountered. So, for example, if you set the
+input radix to 10 and enter '10ACF', B<basecvt> would assume that you
+had entered '10' and ignore the rest of the string.
+
+If no values are provided, no output is written, but the program
+simply terminates with a zero exit status. Error diagnostics are
+written to standard error in the event of out-of-range radix
+specifications. Regardless of the actual values of the input and
+output radix, the radix arguments are taken to be in base 10 (decimal)
+notation.
+
+=head1 DIGITS
+
+For radices from 2-10, standard ASCII decimal digits 0-9 are used for
+both input and output. For radices from 11-36, the ASCII letters A-Z
+are also included, following the convention used in hexadecimal. In
+this range, input is accepted in either upper or lower case, although
+on output only lower-case letters are used.
+
+For radices from 37-62, the output includes both upper- and lower-case
+ASCII letters, and case matters. In this range, case is distinguished
+both for input and for output values.
+
+For radices 63 and 64, the characters '+' (plus) and '/' (forward
+solidus) are also used. These are derived from the MIME base64
+encoding scheme. The overall encoding is not the same as base64,
+because the ASCII digits are used for the bottom of the range, and the
+letters are shifted upward; however, the output will consist of the
+same character set.
+
+This input and output behaviour is inherited from the MPI library used
+by B<basecvt>, and so is not configurable at runtime.
+
+=head1 SEE ALSO
+
+ dec2hex(1), hex2dec(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/build b/security/nss/lib/freebl/mpi/doc/build
new file mode 100755
index 0000000000..4d75b1e5a2
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/build
@@ -0,0 +1,30 @@
+#!/bin/sh
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+VERS="1.7p6"
+SECT="1"
+NAME="MPI Tools"
+
+echo "Building manual pages ..."
+case $# in
+ 0)
+ files=`ls *.pod`
+ ;;
+ *)
+ files=$*
+ ;;
+esac
+
+for name in $files
+do
+ echo -n "$name ... "
+# sname=`noext $name`
+ sname=`basename $name .pod`
+ pod2man --section="$SECT" --center="$NAME" --release="$VERS" $name > $sname.$SECT
+ echo "(done)"
+done
+
+echo "Finished building."
+
diff --git a/security/nss/lib/freebl/mpi/doc/div.txt b/security/nss/lib/freebl/mpi/doc/div.txt
new file mode 100644
index 0000000000..c13fb6ef18
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/div.txt
@@ -0,0 +1,64 @@
+Division
+
+This describes the division algorithm used by the MPI library.
+
+Input: a, b; a > b
+Compute: Q, R; a = Qb + R
+
+The input numbers are normalized so that the high-order digit of b is
+at least half the radix. This guarantees that we have a reasonable
+way to guess at the digits of the quotient (this method was taken from
+Knuth, vol. 2, with adaptations).
+
+To normalize, test the high-order digit of b. If it is less than half
+the radix, multiply both a and b by d, where:
+
+ radix - 1
+ d = -----------
+ bmax + 1
+
+...where bmax is the high-order digit of b. Otherwise, set d = 1.
+
+Given normalize values for a and b, let the notation a[n] denote the
+nth digit of a. Let #a be the number of significant figures of a (not
+including any leading zeroes).
+
+ Let R = 0
+ Let p = #a - 1
+
+ while(p >= 0)
+ do
+ R = (R * radix) + a[p]
+ p = p - 1
+ while(R < b and p >= 0)
+
+ if(R < b)
+ break
+
+ q = (R[#R - 1] * radix) + R[#R - 2]
+ q = q / b[#b - 1]
+
+ T = b * q
+
+ while(T > L)
+ q = q - 1
+ T = T - b
+ endwhile
+
+ L = L - T
+
+ Q = (Q * radix) + q
+
+ endwhile
+
+At this point, Q is the quotient, and R is the normalized remainder.
+To denormalize R, compute:
+
+ R = (R / d)
+
+At this point, you are finished.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/expt.txt b/security/nss/lib/freebl/mpi/doc/expt.txt
new file mode 100644
index 0000000000..bd9d6f1960
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/expt.txt
@@ -0,0 +1,94 @@
+Exponentiation
+
+For exponentiation, the MPI library uses a simple and fairly standard
+square-and-multiply method. The algorithm is this:
+
+Input: a, b
+Output: a ** b
+
+ s = 1
+
+ while(b != 0)
+ if(b is odd)
+ s = s * a
+ endif
+
+ b = b / 2
+
+ x = x * x
+ endwhile
+
+ return s
+
+The modular exponentiation is done the same way, except replacing:
+
+ s = s * a
+
+with
+ s = (s * a) mod m
+
+and replacing
+
+ x = x * x
+
+with
+
+ x = (x * x) mod m
+
+Here is a sample exponentiation using the MPI library, as compared to
+the same problem solved by the Unix 'bc' program on my system:
+
+Computation of 2,381,283 ** 235
+
+'bc' says:
+
+4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\
+4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\
+6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\
+4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\
+6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\
+FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\
+CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\
+5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\
+CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\
+49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\
+5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\
+A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\
+D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\
+92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\
+A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\
+AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\
+E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\
+1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\
+CFFF2E1AC93F3CA264A1B
+
+MPI says:
+
+4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\
+4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\
+6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\
+4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\
+6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\
+FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\
+CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\
+5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\
+CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\
+49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\
+5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\
+A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\
+D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\
+92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\
+A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\
+AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\
+E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\
+1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\
+CFFF2E1AC93F3CA264A1B
+
+Diff says:
+% diff bc.txt mp.txt
+%
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/gcd.pod b/security/nss/lib/freebl/mpi/doc/gcd.pod
new file mode 100644
index 0000000000..b5b8fa34fd
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/gcd.pod
@@ -0,0 +1,28 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ gcd - compute greatest common divisor of two integers
+
+=head1 SYNOPSIS
+
+ gcd <a> <b>
+
+=head1 DESCRIPTION
+
+The B<gcd> program computes the greatest common divisor of two
+arbitrary-precision integers I<a> and I<b>. The result is written in
+standard decimal notation to the standard output.
+
+If I<b> is zero, B<gcd> will print an error message and exit.
+
+=head1 SEE ALSO
+
+invmod(1), isprime(1), lap(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/invmod.pod b/security/nss/lib/freebl/mpi/doc/invmod.pod
new file mode 100644
index 0000000000..0194f44884
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/invmod.pod
@@ -0,0 +1,34 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ invmod - compute modular inverse of an integer
+
+=head1 SYNOPSIS
+
+ invmod <a> <m>
+
+=head1 DESCRIPTION
+
+The B<invmod> program computes the inverse of I<a>, modulo I<m>, if
+that inverse exists. Both I<a> and I<m> are arbitrary-precision
+integers in decimal notation. The result is written in standard
+decimal notation to the standard output.
+
+If there is no inverse, the message:
+
+ No inverse
+
+...will be printed to the standard output (an inverse exists if and
+only if the greatest common divisor of I<a> and I<m> is 1).
+
+=head1 SEE ALSO
+
+gcd(1), isprime(1), lap(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/isprime.pod b/security/nss/lib/freebl/mpi/doc/isprime.pod
new file mode 100644
index 0000000000..a8ec1f7ee3
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/isprime.pod
@@ -0,0 +1,63 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ isprime - probabilistic primality testing
+
+=head1 SYNOPSIS
+
+ isprime <a>
+
+=head1 DESCRIPTION
+
+The B<isprime> program attempts to determine whether the arbitrary
+precision integer I<a> is prime. It first tests I<a> for divisibility
+by the first 170 or so small primes, and assuming I<a> is not
+divisible by any of these, applies 15 iterations of the Rabin-Miller
+probabilistic primality test.
+
+If the program discovers that the number is composite, it will print:
+
+ Not prime (reason)
+
+Where I<reason> is either:
+
+ divisible by small prime x
+
+Or:
+
+ failed nth pseudoprime test
+
+In the first case, I<x> indicates the first small prime factor that
+was found. In the second case, I<n> indicates which of the
+pseudoprime tests failed (numbered from 1)
+
+If this happens, the number is definitely not prime. However, if the
+number succeeds, this message results:
+
+ Probably prime, 1 in 4^15 chance of false positive
+
+If this happens, the number is prime with very high probability, but
+its primality has not been absolutely proven, only demonstrated to a
+very convincing degree.
+
+The value I<a> can be input in standard decimal notation, or, if it is
+prefixed with I<Ox>, it will be read as hexadecimal.
+
+=head1 ENVIRONMENT
+
+You can control how many iterations of Rabin-Miller are performed on
+the candidate number by setting the I<RM_TESTS> environment variable
+to an integer value before starting up B<isprime>. This will change
+the output slightly if the number passes all the tests.
+
+=head1 SEE ALSO
+
+gcd(1), invmod(1), lap(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/lap.pod b/security/nss/lib/freebl/mpi/doc/lap.pod
new file mode 100644
index 0000000000..47539fbbf9
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/lap.pod
@@ -0,0 +1,36 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ lap - compute least annihilating power of a number
+
+=head1 SYNOPSIS
+
+ lap <a> <m>
+
+=head1 DESCRIPTION
+
+The B<lap> program computes the order of I<a> modulo I<m>, for
+arbitrary precision integers I<a> and I<m>. The B<order> of I<a>
+modulo I<m> is defined as the smallest positive value I<n> for which
+I<a> raised to the I<n>th power, modulo I<m>, is equal to 1. The
+order may not exist, if I<m> is composite.
+
+=head1 RESTRICTIONS
+
+This program is very slow, especially for large moduli. It is
+intended as a way to help find primitive elements in a modular field,
+but it does not do so in a particularly inefficient manner. It was
+written simply to help verify that a particular candidate does not
+have an obviously short cycle mod I<m>.
+
+=head1 SEE ALSO
+
+gcd(1), invmod(1), isprime(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/mpi-test.pod b/security/nss/lib/freebl/mpi/doc/mpi-test.pod
new file mode 100644
index 0000000000..b05f866e5e
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/mpi-test.pod
@@ -0,0 +1,51 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ mpi-test - automated test program for MPI library
+
+=head1 SYNOPSIS
+
+ mpi-test <suite-name> [quiet]
+ mpi-test list
+ mpi-test help
+
+=head1 DESCRIPTION
+
+The B<mpi-test> program is a general unit test driver for the MPI
+library. It is used to verify that the library works as it is
+supposed to on your architecture. As with most such things, passing
+all the tests in B<mpi-test> does not guarantee the code is correct,
+but if any of them fail, there are certainly problems.
+
+Each major function of the library can be tested individually. For a
+list of the test suites understood by B<mpi-test>, run it with the
+I<list> command line option:
+
+ mpi-test list
+
+This will display a list of the available test suites and a brief
+synopsis of what each one does. For a brief overview of this
+document, run B<mpi-test> I<help>.
+
+B<mpi-test> exits with a zero status if the selected test succeeds, or
+a nonzero status if it fails. If a I<suite-name> which is not
+understood by B<mpi-test> is given, a diagnostic is printed to the
+standard error, and the program exits with a result code of 2. If a
+test fails, the result code will be 1, and a diagnostic is ordinarily
+printed to the standard error. However, if the I<quiet> option is
+provided, these diagnostics will be suppressed.
+
+=head1 RESTRICTIONS
+
+Only a few canned test cases are provided. The solutions have been
+verified using the GNU bc(1) program, so bugs there may cause problems
+here; however, this is very unlikely, so if a test fails, it is almost
+certainly my fault, not bc(1)'s.
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/mul.txt b/security/nss/lib/freebl/mpi/doc/mul.txt
new file mode 100644
index 0000000000..975f56ddbe
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/mul.txt
@@ -0,0 +1,77 @@
+Multiplication
+
+This describes the multiplication algorithm used by the MPI library.
+
+This is basically a standard "schoolbook" algorithm. It is slow --
+O(mn) for m = #a, n = #b -- but easy to implement and verify.
+Basically, we run two nested loops, as illustrated here (R is the
+radix):
+
+k = 0
+for j <- 0 to (#b - 1)
+ for i <- 0 to (#a - 1)
+ w = (a[j] * b[i]) + k + c[i+j]
+ c[i+j] = w mod R
+ k = w div R
+ endfor
+ c[i+j] = k;
+ k = 0;
+endfor
+
+It is necessary that 'w' have room for at least two radix R digits.
+The product of any two digits in radix R is at most:
+
+ (R - 1)(R - 1) = R^2 - 2R + 1
+
+Since a two-digit radix-R number can hold R^2 - 1 distinct values,
+this insures that the product will fit into the two-digit register.
+
+To insure that two digits is enough for w, we must also show that
+there is room for the carry-in from the previous multiplication, and
+the current value of the product digit that is being recomputed.
+Assuming each of these may be as big as R - 1 (and no larger,
+certainly), two digits will be enough if and only if:
+
+ (R^2 - 2R + 1) + 2(R - 1) <= R^2 - 1
+
+Solving this equation shows that, indeed, this is the case:
+
+ R^2 - 2R + 1 + 2R - 2 <= R^2 - 1
+
+ R^2 - 1 <= R^2 - 1
+
+This suggests that a good radix would be one more than the largest
+value that can be held in half a machine word -- so, for example, as
+in this implementation, where we used a radix of 65536 on a machine
+with 4-byte words. Another advantage of a radix of this sort is that
+binary-level operations are easy on numbers in this representation.
+
+Here's an example multiplication worked out longhand in radix-10,
+using the above algorithm:
+
+ a = 999
+ b = x 999
+ -------------
+ p = 98001
+
+w = (a[jx] * b[ix]) + kin + c[ix + jx]
+c[ix+jx] = w % RADIX
+k = w / RADIX
+ product
+ix jx a[jx] b[ix] kin w c[i+j] kout 000000
+0 0 9 9 0 81+0+0 1 8 000001
+0 1 9 9 8 81+8+0 9 8 000091
+0 2 9 9 8 81+8+0 9 8 000991
+ 8 0 008991
+1 0 9 9 0 81+0+9 0 9 008901
+1 1 9 9 9 81+9+9 9 9 008901
+1 2 9 9 9 81+9+8 8 9 008901
+ 9 0 098901
+2 0 9 9 0 81+0+9 0 9 098001
+2 1 9 9 9 81+9+8 8 9 098001
+2 2 9 9 9 81+9+9 9 9 098001
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/pi.txt b/security/nss/lib/freebl/mpi/doc/pi.txt
new file mode 100644
index 0000000000..a6ef91137f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/pi.txt
@@ -0,0 +1,53 @@
+This file describes how pi is computed by the program in 'pi.c' (see
+the utils subdirectory).
+
+Basically, we use Machin's formula, which is what everyone in the
+world uses as a simple method for computing approximations to pi.
+This works for up to a few thousand digits without too much effort.
+Beyond that, though, it gets too slow.
+
+Machin's formula states:
+
+ pi := 16 * arctan(1/5) - 4 * arctan(1/239)
+
+We compute this in integer arithmetic by first multiplying everything
+through by 10^d, where 'd' is the number of digits of pi we wanted to
+compute. It turns out, the last few digits will be wrong, but the
+number that are wrong is usually very small (ordinarly only 2-3).
+Having done this, we compute the arctan() function using the formula:
+
+ 1 1 1 1 1
+ arctan(1/x) := --- - ----- + ----- - ----- + ----- - ...
+ x 3 x^3 5 x^5 7 x^7 9 x^9
+
+This is done iteratively by computing the first term manually, and
+then iteratively dividing x^2 and k, where k = 3, 5, 7, ... out of the
+current figure. This is then added to (or subtracted from) a running
+sum, as appropriate. The iteration continues until we overflow our
+available precision and the current figure goes to zero under integer
+division. At that point, we're finished.
+
+Actually, we get a couple extra bits of precision out of the fact that
+we know we're computing y * arctan(1/x), by setting up the multiplier
+as:
+
+ y * 10^d
+
+... instead of just 10^d. There is also a bit of cleverness in how
+the loop is constructed, to avoid special-casing the first term.
+Check out the code for arctan() in 'pi.c', if you are interested in
+seeing how it is set up.
+
+Thanks to Jason P. for this algorithm, which I assembled from notes
+and programs found on his cool "Pile of Pi Programs" page, at:
+
+ http://www.isr.umd.edu/~jasonp/pipage.html
+
+Thanks also to Henrik Johansson <Henrik.Johansson@Nexus.Comm.SE>, from
+whose pi program I borrowed the clever idea of pre-multiplying by x in
+order to avoid a special case on the loop iteration.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/prime.txt b/security/nss/lib/freebl/mpi/doc/prime.txt
new file mode 100644
index 0000000000..694797d5f3
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/prime.txt
@@ -0,0 +1,6542 @@
+2
+3
+5
+7
+11
+13
+17
+19
+23
+29
+31
+37
+41
+43
+47
+53
+59
+61
+67
+71
+73
+79
+83
+89
+97
+101
+103
+107
+109
+113
+127
+131
+137
+139
+149
+151
+157
+163
+167
+173
+179
+181
+191
+193
+197
+199
+211
+223
+227
+229
+233
+239
+241
+251
+257
+263
+269
+271
+277
+281
+283
+293
+307
+311
+313
+317
+331
+337
+347
+349
+353
+359
+367
+373
+379
+383
+389
+397
+401
+409
+419
+421
+431
+433
+439
+443
+449
+457
+461
+463
+467
+479
+487
+491
+499
+503
+509
+521
+523
+541
+547
+557
+563
+569
+571
+577
+587
+593
+599
+601
+607
+613
+617
+619
+631
+641
+643
+647
+653
+659
+661
+673
+677
+683
+691
+701
+709
+719
+727
+733
+739
+743
+751
+757
+761
+769
+773
+787
+797
+809
+811
+821
+823
+827
+829
+839
+853
+857
+859
+863
+877
+881
+883
+887
+907
+911
+919
+929
+937
+941
+947
+953
+967
+971
+977
+983
+991
+997
+1009
+1013
+1019
+1021
+1031
+1033
+1039
+1049
+1051
+1061
+1063
+1069
+1087
+1091
+1093
+1097
+1103
+1109
+1117
+1123
+1129
+1151
+1153
+1163
+1171
+1181
+1187
+1193
+1201
+1213
+1217
+1223
+1229
+1231
+1237
+1249
+1259
+1277
+1279
+1283
+1289
+1291
+1297
+1301
+1303
+1307
+1319
+1321
+1327
+1361
+1367
+1373
+1381
+1399
+1409
+1423
+1427
+1429
+1433
+1439
+1447
+1451
+1453
+1459
+1471
+1481
+1483
+1487
+1489
+1493
+1499
+1511
+1523
+1531
+1543
+1549
+1553
+1559
+1567
+1571
+1579
+1583
+1597
+1601
+1607
+1609
+1613
+1619
+1621
+1627
+1637
+1657
+1663
+1667
+1669
+1693
+1697
+1699
+1709
+1721
+1723
+1733
+1741
+1747
+1753
+1759
+1777
+1783
+1787
+1789
+1801
+1811
+1823
+1831
+1847
+1861
+1867
+1871
+1873
+1877
+1879
+1889
+1901
+1907
+1913
+1931
+1933
+1949
+1951
+1973
+1979
+1987
+1993
+1997
+1999
+2003
+2011
+2017
+2027
+2029
+2039
+2053
+2063
+2069
+2081
+2083
+2087
+2089
+2099
+2111
+2113
+2129
+2131
+2137
+2141
+2143
+2153
+2161
+2179
+2203
+2207
+2213
+2221
+2237
+2239
+2243
+2251
+2267
+2269
+2273
+2281
+2287
+2293
+2297
+2309
+2311
+2333
+2339
+2341
+2347
+2351
+2357
+2371
+2377
+2381
+2383
+2389
+2393
+2399
+2411
+2417
+2423
+2437
+2441
+2447
+2459
+2467
+2473
+2477
+2503
+2521
+2531
+2539
+2543
+2549
+2551
+2557
+2579
+2591
+2593
+2609
+2617
+2621
+2633
+2647
+2657
+2659
+2663
+2671
+2677
+2683
+2687
+2689
+2693
+2699
+2707
+2711
+2713
+2719
+2729
+2731
+2741
+2749
+2753
+2767
+2777
+2789
+2791
+2797
+2801
+2803
+2819
+2833
+2837
+2843
+2851
+2857
+2861
+2879
+2887
+2897
+2903
+2909
+2917
+2927
+2939
+2953
+2957
+2963
+2969
+2971
+2999
+3001
+3011
+3019
+3023
+3037
+3041
+3049
+3061
+3067
+3079
+3083
+3089
+3109
+3119
+3121
+3137
+3163
+3167
+3169
+3181
+3187
+3191
+3203
+3209
+3217
+3221
+3229
+3251
+3253
+3257
+3259
+3271
+3299
+3301
+3307
+3313
+3319
+3323
+3329
+3331
+3343
+3347
+3359
+3361
+3371
+3373
+3389
+3391
+3407
+3413
+3433
+3449
+3457
+3461
+3463
+3467
+3469
+3491
+3499
+3511
+3517
+3527
+3529
+3533
+3539
+3541
+3547
+3557
+3559
+3571
+3581
+3583
+3593
+3607
+3613
+3617
+3623
+3631
+3637
+3643
+3659
+3671
+3673
+3677
+3691
+3697
+3701
+3709
+3719
+3727
+3733
+3739
+3761
+3767
+3769
+3779
+3793
+3797
+3803
+3821
+3823
+3833
+3847
+3851
+3853
+3863
+3877
+3881
+3889
+3907
+3911
+3917
+3919
+3923
+3929
+3931
+3943
+3947
+3967
+3989
+4001
+4003
+4007
+4013
+4019
+4021
+4027
+4049
+4051
+4057
+4073
+4079
+4091
+4093
+4099
+4111
+4127
+4129
+4133
+4139
+4153
+4157
+4159
+4177
+4201
+4211
+4217
+4219
+4229
+4231
+4241
+4243
+4253
+4259
+4261
+4271
+4273
+4283
+4289
+4297
+4327
+4337
+4339
+4349
+4357
+4363
+4373
+4391
+4397
+4409
+4421
+4423
+4441
+4447
+4451
+4457
+4463
+4481
+4483
+4493
+4507
+4513
+4517
+4519
+4523
+4547
+4549
+4561
+4567
+4583
+4591
+4597
+4603
+4621
+4637
+4639
+4643
+4649
+4651
+4657
+4663
+4673
+4679
+4691
+4703
+4721
+4723
+4729
+4733
+4751
+4759
+4783
+4787
+4789
+4793
+4799
+4801
+4813
+4817
+4831
+4861
+4871
+4877
+4889
+4903
+4909
+4919
+4931
+4933
+4937
+4943
+4951
+4957
+4967
+4969
+4973
+4987
+4993
+4999
+5003
+5009
+5011
+5021
+5023
+5039
+5051
+5059
+5077
+5081
+5087
+5099
+5101
+5107
+5113
+5119
+5147
+5153
+5167
+5171
+5179
+5189
+5197
+5209
+5227
+5231
+5233
+5237
+5261
+5273
+5279
+5281
+5297
+5303
+5309
+5323
+5333
+5347
+5351
+5381
+5387
+5393
+5399
+5407
+5413
+5417
+5419
+5431
+5437
+5441
+5443
+5449
+5471
+5477
+5479
+5483
+5501
+5503
+5507
+5519
+5521
+5527
+5531
+5557
+5563
+5569
+5573
+5581
+5591
+5623
+5639
+5641
+5647
+5651
+5653
+5657
+5659
+5669
+5683
+5689
+5693
+5701
+5711
+5717
+5737
+5741
+5743
+5749
+5779
+5783
+5791
+5801
+5807
+5813
+5821
+5827
+5839
+5843
+5849
+5851
+5857
+5861
+5867
+5869
+5879
+5881
+5897
+5903
+5923
+5927
+5939
+5953
+5981
+5987
+6007
+6011
+6029
+6037
+6043
+6047
+6053
+6067
+6073
+6079
+6089
+6091
+6101
+6113
+6121
+6131
+6133
+6143
+6151
+6163
+6173
+6197
+6199
+6203
+6211
+6217
+6221
+6229
+6247
+6257
+6263
+6269
+6271
+6277
+6287
+6299
+6301
+6311
+6317
+6323
+6329
+6337
+6343
+6353
+6359
+6361
+6367
+6373
+6379
+6389
+6397
+6421
+6427
+6449
+6451
+6469
+6473
+6481
+6491
+6521
+6529
+6547
+6551
+6553
+6563
+6569
+6571
+6577
+6581
+6599
+6607
+6619
+6637
+6653
+6659
+6661
+6673
+6679
+6689
+6691
+6701
+6703
+6709
+6719
+6733
+6737
+6761
+6763
+6779
+6781
+6791
+6793
+6803
+6823
+6827
+6829
+6833
+6841
+6857
+6863
+6869
+6871
+6883
+6899
+6907
+6911
+6917
+6947
+6949
+6959
+6961
+6967
+6971
+6977
+6983
+6991
+6997
+7001
+7013
+7019
+7027
+7039
+7043
+7057
+7069
+7079
+7103
+7109
+7121
+7127
+7129
+7151
+7159
+7177
+7187
+7193
+7207
+7211
+7213
+7219
+7229
+7237
+7243
+7247
+7253
+7283
+7297
+7307
+7309
+7321
+7331
+7333
+7349
+7351
+7369
+7393
+7411
+7417
+7433
+7451
+7457
+7459
+7477
+7481
+7487
+7489
+7499
+7507
+7517
+7523
+7529
+7537
+7541
+7547
+7549
+7559
+7561
+7573
+7577
+7583
+7589
+7591
+7603
+7607
+7621
+7639
+7643
+7649
+7669
+7673
+7681
+7687
+7691
+7699
+7703
+7717
+7723
+7727
+7741
+7753
+7757
+7759
+7789
+7793
+7817
+7823
+7829
+7841
+7853
+7867
+7873
+7877
+7879
+7883
+7901
+7907
+7919
+7927
+7933
+7937
+7949
+7951
+7963
+7993
+8009
+8011
+8017
+8039
+8053
+8059
+8069
+8081
+8087
+8089
+8093
+8101
+8111
+8117
+8123
+8147
+8161
+8167
+8171
+8179
+8191
+8209
+8219
+8221
+8231
+8233
+8237
+8243
+8263
+8269
+8273
+8287
+8291
+8293
+8297
+8311
+8317
+8329
+8353
+8363
+8369
+8377
+8387
+8389
+8419
+8423
+8429
+8431
+8443
+8447
+8461
+8467
+8501
+8513
+8521
+8527
+8537
+8539
+8543
+8563
+8573
+8581
+8597
+8599
+8609
+8623
+8627
+8629
+8641
+8647
+8663
+8669
+8677
+8681
+8689
+8693
+8699
+8707
+8713
+8719
+8731
+8737
+8741
+8747
+8753
+8761
+8779
+8783
+8803
+8807
+8819
+8821
+8831
+8837
+8839
+8849
+8861
+8863
+8867
+8887
+8893
+8923
+8929
+8933
+8941
+8951
+8963
+8969
+8971
+8999
+9001
+9007
+9011
+9013
+9029
+9041
+9043
+9049
+9059
+9067
+9091
+9103
+9109
+9127
+9133
+9137
+9151
+9157
+9161
+9173
+9181
+9187
+9199
+9203
+9209
+9221
+9227
+9239
+9241
+9257
+9277
+9281
+9283
+9293
+9311
+9319
+9323
+9337
+9341
+9343
+9349
+9371
+9377
+9391
+9397
+9403
+9413
+9419
+9421
+9431
+9433
+9437
+9439
+9461
+9463
+9467
+9473
+9479
+9491
+9497
+9511
+9521
+9533
+9539
+9547
+9551
+9587
+9601
+9613
+9619
+9623
+9629
+9631
+9643
+9649
+9661
+9677
+9679
+9689
+9697
+9719
+9721
+9733
+9739
+9743
+9749
+9767
+9769
+9781
+9787
+9791
+9803
+9811
+9817
+9829
+9833
+9839
+9851
+9857
+9859
+9871
+9883
+9887
+9901
+9907
+9923
+9929
+9931
+9941
+9949
+9967
+9973
+10007
+10009
+10037
+10039
+10061
+10067
+10069
+10079
+10091
+10093
+10099
+10103
+10111
+10133
+10139
+10141
+10151
+10159
+10163
+10169
+10177
+10181
+10193
+10211
+10223
+10243
+10247
+10253
+10259
+10267
+10271
+10273
+10289
+10301
+10303
+10313
+10321
+10331
+10333
+10337
+10343
+10357
+10369
+10391
+10399
+10427
+10429
+10433
+10453
+10457
+10459
+10463
+10477
+10487
+10499
+10501
+10513
+10529
+10531
+10559
+10567
+10589
+10597
+10601
+10607
+10613
+10627
+10631
+10639
+10651
+10657
+10663
+10667
+10687
+10691
+10709
+10711
+10723
+10729
+10733
+10739
+10753
+10771
+10781
+10789
+10799
+10831
+10837
+10847
+10853
+10859
+10861
+10867
+10883
+10889
+10891
+10903
+10909
+10937
+10939
+10949
+10957
+10973
+10979
+10987
+10993
+11003
+11027
+11047
+11057
+11059
+11069
+11071
+11083
+11087
+11093
+11113
+11117
+11119
+11131
+11149
+11159
+11161
+11171
+11173
+11177
+11197
+11213
+11239
+11243
+11251
+11257
+11261
+11273
+11279
+11287
+11299
+11311
+11317
+11321
+11329
+11351
+11353
+11369
+11383
+11393
+11399
+11411
+11423
+11437
+11443
+11447
+11467
+11471
+11483
+11489
+11491
+11497
+11503
+11519
+11527
+11549
+11551
+11579
+11587
+11593
+11597
+11617
+11621
+11633
+11657
+11677
+11681
+11689
+11699
+11701
+11717
+11719
+11731
+11743
+11777
+11779
+11783
+11789
+11801
+11807
+11813
+11821
+11827
+11831
+11833
+11839
+11863
+11867
+11887
+11897
+11903
+11909
+11923
+11927
+11933
+11939
+11941
+11953
+11959
+11969
+11971
+11981
+11987
+12007
+12011
+12037
+12041
+12043
+12049
+12071
+12073
+12097
+12101
+12107
+12109
+12113
+12119
+12143
+12149
+12157
+12161
+12163
+12197
+12203
+12211
+12227
+12239
+12241
+12251
+12253
+12263
+12269
+12277
+12281
+12289
+12301
+12323
+12329
+12343
+12347
+12373
+12377
+12379
+12391
+12401
+12409
+12413
+12421
+12433
+12437
+12451
+12457
+12473
+12479
+12487
+12491
+12497
+12503
+12511
+12517
+12527
+12539
+12541
+12547
+12553
+12569
+12577
+12583
+12589
+12601
+12611
+12613
+12619
+12637
+12641
+12647
+12653
+12659
+12671
+12689
+12697
+12703
+12713
+12721
+12739
+12743
+12757
+12763
+12781
+12791
+12799
+12809
+12821
+12823
+12829
+12841
+12853
+12889
+12893
+12899
+12907
+12911
+12917
+12919
+12923
+12941
+12953
+12959
+12967
+12973
+12979
+12983
+13001
+13003
+13007
+13009
+13033
+13037
+13043
+13049
+13063
+13093
+13099
+13103
+13109
+13121
+13127
+13147
+13151
+13159
+13163
+13171
+13177
+13183
+13187
+13217
+13219
+13229
+13241
+13249
+13259
+13267
+13291
+13297
+13309
+13313
+13327
+13331
+13337
+13339
+13367
+13381
+13397
+13399
+13411
+13417
+13421
+13441
+13451
+13457
+13463
+13469
+13477
+13487
+13499
+13513
+13523
+13537
+13553
+13567
+13577
+13591
+13597
+13613
+13619
+13627
+13633
+13649
+13669
+13679
+13681
+13687
+13691
+13693
+13697
+13709
+13711
+13721
+13723
+13729
+13751
+13757
+13759
+13763
+13781
+13789
+13799
+13807
+13829
+13831
+13841
+13859
+13873
+13877
+13879
+13883
+13901
+13903
+13907
+13913
+13921
+13931
+13933
+13963
+13967
+13997
+13999
+14009
+14011
+14029
+14033
+14051
+14057
+14071
+14081
+14083
+14087
+14107
+14143
+14149
+14153
+14159
+14173
+14177
+14197
+14207
+14221
+14243
+14249
+14251
+14281
+14293
+14303
+14321
+14323
+14327
+14341
+14347
+14369
+14387
+14389
+14401
+14407
+14411
+14419
+14423
+14431
+14437
+14447
+14449
+14461
+14479
+14489
+14503
+14519
+14533
+14537
+14543
+14549
+14551
+14557
+14561
+14563
+14591
+14593
+14621
+14627
+14629
+14633
+14639
+14653
+14657
+14669
+14683
+14699
+14713
+14717
+14723
+14731
+14737
+14741
+14747
+14753
+14759
+14767
+14771
+14779
+14783
+14797
+14813
+14821
+14827
+14831
+14843
+14851
+14867
+14869
+14879
+14887
+14891
+14897
+14923
+14929
+14939
+14947
+14951
+14957
+14969
+14983
+15013
+15017
+15031
+15053
+15061
+15073
+15077
+15083
+15091
+15101
+15107
+15121
+15131
+15137
+15139
+15149
+15161
+15173
+15187
+15193
+15199
+15217
+15227
+15233
+15241
+15259
+15263
+15269
+15271
+15277
+15287
+15289
+15299
+15307
+15313
+15319
+15329
+15331
+15349
+15359
+15361
+15373
+15377
+15383
+15391
+15401
+15413
+15427
+15439
+15443
+15451
+15461
+15467
+15473
+15493
+15497
+15511
+15527
+15541
+15551
+15559
+15569
+15581
+15583
+15601
+15607
+15619
+15629
+15641
+15643
+15647
+15649
+15661
+15667
+15671
+15679
+15683
+15727
+15731
+15733
+15737
+15739
+15749
+15761
+15767
+15773
+15787
+15791
+15797
+15803
+15809
+15817
+15823
+15859
+15877
+15881
+15887
+15889
+15901
+15907
+15913
+15919
+15923
+15937
+15959
+15971
+15973
+15991
+16001
+16007
+16033
+16057
+16061
+16063
+16067
+16069
+16073
+16087
+16091
+16097
+16103
+16111
+16127
+16139
+16141
+16183
+16187
+16189
+16193
+16217
+16223
+16229
+16231
+16249
+16253
+16267
+16273
+16301
+16319
+16333
+16339
+16349
+16361
+16363
+16369
+16381
+16411
+16417
+16421
+16427
+16433
+16447
+16451
+16453
+16477
+16481
+16487
+16493
+16519
+16529
+16547
+16553
+16561
+16567
+16573
+16603
+16607
+16619
+16631
+16633
+16649
+16651
+16657
+16661
+16673
+16691
+16693
+16699
+16703
+16729
+16741
+16747
+16759
+16763
+16787
+16811
+16823
+16829
+16831
+16843
+16871
+16879
+16883
+16889
+16901
+16903
+16921
+16927
+16931
+16937
+16943
+16963
+16979
+16981
+16987
+16993
+17011
+17021
+17027
+17029
+17033
+17041
+17047
+17053
+17077
+17093
+17099
+17107
+17117
+17123
+17137
+17159
+17167
+17183
+17189
+17191
+17203
+17207
+17209
+17231
+17239
+17257
+17291
+17293
+17299
+17317
+17321
+17327
+17333
+17341
+17351
+17359
+17377
+17383
+17387
+17389
+17393
+17401
+17417
+17419
+17431
+17443
+17449
+17467
+17471
+17477
+17483
+17489
+17491
+17497
+17509
+17519
+17539
+17551
+17569
+17573
+17579
+17581
+17597
+17599
+17609
+17623
+17627
+17657
+17659
+17669
+17681
+17683
+17707
+17713
+17729
+17737
+17747
+17749
+17761
+17783
+17789
+17791
+17807
+17827
+17837
+17839
+17851
+17863
+17881
+17891
+17903
+17909
+17911
+17921
+17923
+17929
+17939
+17957
+17959
+17971
+17977
+17981
+17987
+17989
+18013
+18041
+18043
+18047
+18049
+18059
+18061
+18077
+18089
+18097
+18119
+18121
+18127
+18131
+18133
+18143
+18149
+18169
+18181
+18191
+18199
+18211
+18217
+18223
+18229
+18233
+18251
+18253
+18257
+18269
+18287
+18289
+18301
+18307
+18311
+18313
+18329
+18341
+18353
+18367
+18371
+18379
+18397
+18401
+18413
+18427
+18433
+18439
+18443
+18451
+18457
+18461
+18481
+18493
+18503
+18517
+18521
+18523
+18539
+18541
+18553
+18583
+18587
+18593
+18617
+18637
+18661
+18671
+18679
+18691
+18701
+18713
+18719
+18731
+18743
+18749
+18757
+18773
+18787
+18793
+18797
+18803
+18839
+18859
+18869
+18899
+18911
+18913
+18917
+18919
+18947
+18959
+18973
+18979
+19001
+19009
+19013
+19031
+19037
+19051
+19069
+19073
+19079
+19081
+19087
+19121
+19139
+19141
+19157
+19163
+19181
+19183
+19207
+19211
+19213
+19219
+19231
+19237
+19249
+19259
+19267
+19273
+19289
+19301
+19309
+19319
+19333
+19373
+19379
+19381
+19387
+19391
+19403
+19417
+19421
+19423
+19427
+19429
+19433
+19441
+19447
+19457
+19463
+19469
+19471
+19477
+19483
+19489
+19501
+19507
+19531
+19541
+19543
+19553
+19559
+19571
+19577
+19583
+19597
+19603
+19609
+19661
+19681
+19687
+19697
+19699
+19709
+19717
+19727
+19739
+19751
+19753
+19759
+19763
+19777
+19793
+19801
+19813
+19819
+19841
+19843
+19853
+19861
+19867
+19889
+19891
+19913
+19919
+19927
+19937
+19949
+19961
+19963
+19973
+19979
+19991
+19993
+19997
+20011
+20021
+20023
+20029
+20047
+20051
+20063
+20071
+20089
+20101
+20107
+20113
+20117
+20123
+20129
+20143
+20147
+20149
+20161
+20173
+20177
+20183
+20201
+20219
+20231
+20233
+20249
+20261
+20269
+20287
+20297
+20323
+20327
+20333
+20341
+20347
+20353
+20357
+20359
+20369
+20389
+20393
+20399
+20407
+20411
+20431
+20441
+20443
+20477
+20479
+20483
+20507
+20509
+20521
+20533
+20543
+20549
+20551
+20563
+20593
+20599
+20611
+20627
+20639
+20641
+20663
+20681
+20693
+20707
+20717
+20719
+20731
+20743
+20747
+20749
+20753
+20759
+20771
+20773
+20789
+20807
+20809
+20849
+20857
+20873
+20879
+20887
+20897
+20899
+20903
+20921
+20929
+20939
+20947
+20959
+20963
+20981
+20983
+21001
+21011
+21013
+21017
+21019
+21023
+21031
+21059
+21061
+21067
+21089
+21101
+21107
+21121
+21139
+21143
+21149
+21157
+21163
+21169
+21179
+21187
+21191
+21193
+21211
+21221
+21227
+21247
+21269
+21277
+21283
+21313
+21317
+21319
+21323
+21341
+21347
+21377
+21379
+21383
+21391
+21397
+21401
+21407
+21419
+21433
+21467
+21481
+21487
+21491
+21493
+21499
+21503
+21517
+21521
+21523
+21529
+21557
+21559
+21563
+21569
+21577
+21587
+21589
+21599
+21601
+21611
+21613
+21617
+21647
+21649
+21661
+21673
+21683
+21701
+21713
+21727
+21737
+21739
+21751
+21757
+21767
+21773
+21787
+21799
+21803
+21817
+21821
+21839
+21841
+21851
+21859
+21863
+21871
+21881
+21893
+21911
+21929
+21937
+21943
+21961
+21977
+21991
+21997
+22003
+22013
+22027
+22031
+22037
+22039
+22051
+22063
+22067
+22073
+22079
+22091
+22093
+22109
+22111
+22123
+22129
+22133
+22147
+22153
+22157
+22159
+22171
+22189
+22193
+22229
+22247
+22259
+22271
+22273
+22277
+22279
+22283
+22291
+22303
+22307
+22343
+22349
+22367
+22369
+22381
+22391
+22397
+22409
+22433
+22441
+22447
+22453
+22469
+22481
+22483
+22501
+22511
+22531
+22541
+22543
+22549
+22567
+22571
+22573
+22613
+22619
+22621
+22637
+22639
+22643
+22651
+22669
+22679
+22691
+22697
+22699
+22709
+22717
+22721
+22727
+22739
+22741
+22751
+22769
+22777
+22783
+22787
+22807
+22811
+22817
+22853
+22859
+22861
+22871
+22877
+22901
+22907
+22921
+22937
+22943
+22961
+22963
+22973
+22993
+23003
+23011
+23017
+23021
+23027
+23029
+23039
+23041
+23053
+23057
+23059
+23063
+23071
+23081
+23087
+23099
+23117
+23131
+23143
+23159
+23167
+23173
+23189
+23197
+23201
+23203
+23209
+23227
+23251
+23269
+23279
+23291
+23293
+23297
+23311
+23321
+23327
+23333
+23339
+23357
+23369
+23371
+23399
+23417
+23431
+23447
+23459
+23473
+23497
+23509
+23531
+23537
+23539
+23549
+23557
+23561
+23563
+23567
+23581
+23593
+23599
+23603
+23609
+23623
+23627
+23629
+23633
+23663
+23669
+23671
+23677
+23687
+23689
+23719
+23741
+23743
+23747
+23753
+23761
+23767
+23773
+23789
+23801
+23813
+23819
+23827
+23831
+23833
+23857
+23869
+23873
+23879
+23887
+23893
+23899
+23909
+23911
+23917
+23929
+23957
+23971
+23977
+23981
+23993
+24001
+24007
+24019
+24023
+24029
+24043
+24049
+24061
+24071
+24077
+24083
+24091
+24097
+24103
+24107
+24109
+24113
+24121
+24133
+24137
+24151
+24169
+24179
+24181
+24197
+24203
+24223
+24229
+24239
+24247
+24251
+24281
+24317
+24329
+24337
+24359
+24371
+24373
+24379
+24391
+24407
+24413
+24419
+24421
+24439
+24443
+24469
+24473
+24481
+24499
+24509
+24517
+24527
+24533
+24547
+24551
+24571
+24593
+24611
+24623
+24631
+24659
+24671
+24677
+24683
+24691
+24697
+24709
+24733
+24749
+24763
+24767
+24781
+24793
+24799
+24809
+24821
+24841
+24847
+24851
+24859
+24877
+24889
+24907
+24917
+24919
+24923
+24943
+24953
+24967
+24971
+24977
+24979
+24989
+25013
+25031
+25033
+25037
+25057
+25073
+25087
+25097
+25111
+25117
+25121
+25127
+25147
+25153
+25163
+25169
+25171
+25183
+25189
+25219
+25229
+25237
+25243
+25247
+25253
+25261
+25301
+25303
+25307
+25309
+25321
+25339
+25343
+25349
+25357
+25367
+25373
+25391
+25409
+25411
+25423
+25439
+25447
+25453
+25457
+25463
+25469
+25471
+25523
+25537
+25541
+25561
+25577
+25579
+25583
+25589
+25601
+25603
+25609
+25621
+25633
+25639
+25643
+25657
+25667
+25673
+25679
+25693
+25703
+25717
+25733
+25741
+25747
+25759
+25763
+25771
+25793
+25799
+25801
+25819
+25841
+25847
+25849
+25867
+25873
+25889
+25903
+25913
+25919
+25931
+25933
+25939
+25943
+25951
+25969
+25981
+25997
+25999
+26003
+26017
+26021
+26029
+26041
+26053
+26083
+26099
+26107
+26111
+26113
+26119
+26141
+26153
+26161
+26171
+26177
+26183
+26189
+26203
+26209
+26227
+26237
+26249
+26251
+26261
+26263
+26267
+26293
+26297
+26309
+26317
+26321
+26339
+26347
+26357
+26371
+26387
+26393
+26399
+26407
+26417
+26423
+26431
+26437
+26449
+26459
+26479
+26489
+26497
+26501
+26513
+26539
+26557
+26561
+26573
+26591
+26597
+26627
+26633
+26641
+26647
+26669
+26681
+26683
+26687
+26693
+26699
+26701
+26711
+26713
+26717
+26723
+26729
+26731
+26737
+26759
+26777
+26783
+26801
+26813
+26821
+26833
+26839
+26849
+26861
+26863
+26879
+26881
+26891
+26893
+26903
+26921
+26927
+26947
+26951
+26953
+26959
+26981
+26987
+26993
+27011
+27017
+27031
+27043
+27059
+27061
+27067
+27073
+27077
+27091
+27103
+27107
+27109
+27127
+27143
+27179
+27191
+27197
+27211
+27239
+27241
+27253
+27259
+27271
+27277
+27281
+27283
+27299
+27329
+27337
+27361
+27367
+27397
+27407
+27409
+27427
+27431
+27437
+27449
+27457
+27479
+27481
+27487
+27509
+27527
+27529
+27539
+27541
+27551
+27581
+27583
+27611
+27617
+27631
+27647
+27653
+27673
+27689
+27691
+27697
+27701
+27733
+27737
+27739
+27743
+27749
+27751
+27763
+27767
+27773
+27779
+27791
+27793
+27799
+27803
+27809
+27817
+27823
+27827
+27847
+27851
+27883
+27893
+27901
+27917
+27919
+27941
+27943
+27947
+27953
+27961
+27967
+27983
+27997
+28001
+28019
+28027
+28031
+28051
+28057
+28069
+28081
+28087
+28097
+28099
+28109
+28111
+28123
+28151
+28163
+28181
+28183
+28201
+28211
+28219
+28229
+28277
+28279
+28283
+28289
+28297
+28307
+28309
+28319
+28349
+28351
+28387
+28393
+28403
+28409
+28411
+28429
+28433
+28439
+28447
+28463
+28477
+28493
+28499
+28513
+28517
+28537
+28541
+28547
+28549
+28559
+28571
+28573
+28579
+28591
+28597
+28603
+28607
+28619
+28621
+28627
+28631
+28643
+28649
+28657
+28661
+28663
+28669
+28687
+28697
+28703
+28711
+28723
+28729
+28751
+28753
+28759
+28771
+28789
+28793
+28807
+28813
+28817
+28837
+28843
+28859
+28867
+28871
+28879
+28901
+28909
+28921
+28927
+28933
+28949
+28961
+28979
+29009
+29017
+29021
+29023
+29027
+29033
+29059
+29063
+29077
+29101
+29123
+29129
+29131
+29137
+29147
+29153
+29167
+29173
+29179
+29191
+29201
+29207
+29209
+29221
+29231
+29243
+29251
+29269
+29287
+29297
+29303
+29311
+29327
+29333
+29339
+29347
+29363
+29383
+29387
+29389
+29399
+29401
+29411
+29423
+29429
+29437
+29443
+29453
+29473
+29483
+29501
+29527
+29531
+29537
+29567
+29569
+29573
+29581
+29587
+29599
+29611
+29629
+29633
+29641
+29663
+29669
+29671
+29683
+29717
+29723
+29741
+29753
+29759
+29761
+29789
+29803
+29819
+29833
+29837
+29851
+29863
+29867
+29873
+29879
+29881
+29917
+29921
+29927
+29947
+29959
+29983
+29989
+30011
+30013
+30029
+30047
+30059
+30071
+30089
+30091
+30097
+30103
+30109
+30113
+30119
+30133
+30137
+30139
+30161
+30169
+30181
+30187
+30197
+30203
+30211
+30223
+30241
+30253
+30259
+30269
+30271
+30293
+30307
+30313
+30319
+30323
+30341
+30347
+30367
+30389
+30391
+30403
+30427
+30431
+30449
+30467
+30469
+30491
+30493
+30497
+30509
+30517
+30529
+30539
+30553
+30557
+30559
+30577
+30593
+30631
+30637
+30643
+30649
+30661
+30671
+30677
+30689
+30697
+30703
+30707
+30713
+30727
+30757
+30763
+30773
+30781
+30803
+30809
+30817
+30829
+30839
+30841
+30851
+30853
+30859
+30869
+30871
+30881
+30893
+30911
+30931
+30937
+30941
+30949
+30971
+30977
+30983
+31013
+31019
+31033
+31039
+31051
+31063
+31069
+31079
+31081
+31091
+31121
+31123
+31139
+31147
+31151
+31153
+31159
+31177
+31181
+31183
+31189
+31193
+31219
+31223
+31231
+31237
+31247
+31249
+31253
+31259
+31267
+31271
+31277
+31307
+31319
+31321
+31327
+31333
+31337
+31357
+31379
+31387
+31391
+31393
+31397
+31469
+31477
+31481
+31489
+31511
+31513
+31517
+31531
+31541
+31543
+31547
+31567
+31573
+31583
+31601
+31607
+31627
+31643
+31649
+31657
+31663
+31667
+31687
+31699
+31721
+31723
+31727
+31729
+31741
+31751
+31769
+31771
+31793
+31799
+31817
+31847
+31849
+31859
+31873
+31883
+31891
+31907
+31957
+31963
+31973
+31981
+31991
+32003
+32009
+32027
+32029
+32051
+32057
+32059
+32063
+32069
+32077
+32083
+32089
+32099
+32117
+32119
+32141
+32143
+32159
+32173
+32183
+32189
+32191
+32203
+32213
+32233
+32237
+32251
+32257
+32261
+32297
+32299
+32303
+32309
+32321
+32323
+32327
+32341
+32353
+32359
+32363
+32369
+32371
+32377
+32381
+32401
+32411
+32413
+32423
+32429
+32441
+32443
+32467
+32479
+32491
+32497
+32503
+32507
+32531
+32533
+32537
+32561
+32563
+32569
+32573
+32579
+32587
+32603
+32609
+32611
+32621
+32633
+32647
+32653
+32687
+32693
+32707
+32713
+32717
+32719
+32749
+32771
+32779
+32783
+32789
+32797
+32801
+32803
+32831
+32833
+32839
+32843
+32869
+32887
+32909
+32911
+32917
+32933
+32939
+32941
+32957
+32969
+32971
+32983
+32987
+32993
+32999
+33013
+33023
+33029
+33037
+33049
+33053
+33071
+33073
+33083
+33091
+33107
+33113
+33119
+33149
+33151
+33161
+33179
+33181
+33191
+33199
+33203
+33211
+33223
+33247
+33287
+33289
+33301
+33311
+33317
+33329
+33331
+33343
+33347
+33349
+33353
+33359
+33377
+33391
+33403
+33409
+33413
+33427
+33457
+33461
+33469
+33479
+33487
+33493
+33503
+33521
+33529
+33533
+33547
+33563
+33569
+33577
+33581
+33587
+33589
+33599
+33601
+33613
+33617
+33619
+33623
+33629
+33637
+33641
+33647
+33679
+33703
+33713
+33721
+33739
+33749
+33751
+33757
+33767
+33769
+33773
+33791
+33797
+33809
+33811
+33827
+33829
+33851
+33857
+33863
+33871
+33889
+33893
+33911
+33923
+33931
+33937
+33941
+33961
+33967
+33997
+34019
+34031
+34033
+34039
+34057
+34061
+34123
+34127
+34129
+34141
+34147
+34157
+34159
+34171
+34183
+34211
+34213
+34217
+34231
+34253
+34259
+34261
+34267
+34273
+34283
+34297
+34301
+34303
+34313
+34319
+34327
+34337
+34351
+34361
+34367
+34369
+34381
+34403
+34421
+34429
+34439
+34457
+34469
+34471
+34483
+34487
+34499
+34501
+34511
+34513
+34519
+34537
+34543
+34549
+34583
+34589
+34591
+34603
+34607
+34613
+34631
+34649
+34651
+34667
+34673
+34679
+34687
+34693
+34703
+34721
+34729
+34739
+34747
+34757
+34759
+34763
+34781
+34807
+34819
+34841
+34843
+34847
+34849
+34871
+34877
+34883
+34897
+34913
+34919
+34939
+34949
+34961
+34963
+34981
+35023
+35027
+35051
+35053
+35059
+35069
+35081
+35083
+35089
+35099
+35107
+35111
+35117
+35129
+35141
+35149
+35153
+35159
+35171
+35201
+35221
+35227
+35251
+35257
+35267
+35279
+35281
+35291
+35311
+35317
+35323
+35327
+35339
+35353
+35363
+35381
+35393
+35401
+35407
+35419
+35423
+35437
+35447
+35449
+35461
+35491
+35507
+35509
+35521
+35527
+35531
+35533
+35537
+35543
+35569
+35573
+35591
+35593
+35597
+35603
+35617
+35671
+35677
+35729
+35731
+35747
+35753
+35759
+35771
+35797
+35801
+35803
+35809
+35831
+35837
+35839
+35851
+35863
+35869
+35879
+35897
+35899
+35911
+35923
+35933
+35951
+35963
+35969
+35977
+35983
+35993
+35999
+36007
+36011
+36013
+36017
+36037
+36061
+36067
+36073
+36083
+36097
+36107
+36109
+36131
+36137
+36151
+36161
+36187
+36191
+36209
+36217
+36229
+36241
+36251
+36263
+36269
+36277
+36293
+36299
+36307
+36313
+36319
+36341
+36343
+36353
+36373
+36383
+36389
+36433
+36451
+36457
+36467
+36469
+36473
+36479
+36493
+36497
+36523
+36527
+36529
+36541
+36551
+36559
+36563
+36571
+36583
+36587
+36599
+36607
+36629
+36637
+36643
+36653
+36671
+36677
+36683
+36691
+36697
+36709
+36713
+36721
+36739
+36749
+36761
+36767
+36779
+36781
+36787
+36791
+36793
+36809
+36821
+36833
+36847
+36857
+36871
+36877
+36887
+36899
+36901
+36913
+36919
+36923
+36929
+36931
+36943
+36947
+36973
+36979
+36997
+37003
+37013
+37019
+37021
+37039
+37049
+37057
+37061
+37087
+37097
+37117
+37123
+37139
+37159
+37171
+37181
+37189
+37199
+37201
+37217
+37223
+37243
+37253
+37273
+37277
+37307
+37309
+37313
+37321
+37337
+37339
+37357
+37361
+37363
+37369
+37379
+37397
+37409
+37423
+37441
+37447
+37463
+37483
+37489
+37493
+37501
+37507
+37511
+37517
+37529
+37537
+37547
+37549
+37561
+37567
+37571
+37573
+37579
+37589
+37591
+37607
+37619
+37633
+37643
+37649
+37657
+37663
+37691
+37693
+37699
+37717
+37747
+37781
+37783
+37799
+37811
+37813
+37831
+37847
+37853
+37861
+37871
+37879
+37889
+37897
+37907
+37951
+37957
+37963
+37967
+37987
+37991
+37993
+37997
+38011
+38039
+38047
+38053
+38069
+38083
+38113
+38119
+38149
+38153
+38167
+38177
+38183
+38189
+38197
+38201
+38219
+38231
+38237
+38239
+38261
+38273
+38281
+38287
+38299
+38303
+38317
+38321
+38327
+38329
+38333
+38351
+38371
+38377
+38393
+38431
+38447
+38449
+38453
+38459
+38461
+38501
+38543
+38557
+38561
+38567
+38569
+38593
+38603
+38609
+38611
+38629
+38639
+38651
+38653
+38669
+38671
+38677
+38693
+38699
+38707
+38711
+38713
+38723
+38729
+38737
+38747
+38749
+38767
+38783
+38791
+38803
+38821
+38833
+38839
+38851
+38861
+38867
+38873
+38891
+38903
+38917
+38921
+38923
+38933
+38953
+38959
+38971
+38977
+38993
+39019
+39023
+39041
+39043
+39047
+39079
+39089
+39097
+39103
+39107
+39113
+39119
+39133
+39139
+39157
+39161
+39163
+39181
+39191
+39199
+39209
+39217
+39227
+39229
+39233
+39239
+39241
+39251
+39293
+39301
+39313
+39317
+39323
+39341
+39343
+39359
+39367
+39371
+39373
+39383
+39397
+39409
+39419
+39439
+39443
+39451
+39461
+39499
+39503
+39509
+39511
+39521
+39541
+39551
+39563
+39569
+39581
+39607
+39619
+39623
+39631
+39659
+39667
+39671
+39679
+39703
+39709
+39719
+39727
+39733
+39749
+39761
+39769
+39779
+39791
+39799
+39821
+39827
+39829
+39839
+39841
+39847
+39857
+39863
+39869
+39877
+39883
+39887
+39901
+39929
+39937
+39953
+39971
+39979
+39983
+39989
+40009
+40013
+40031
+40037
+40039
+40063
+40087
+40093
+40099
+40111
+40123
+40127
+40129
+40151
+40153
+40163
+40169
+40177
+40189
+40193
+40213
+40231
+40237
+40241
+40253
+40277
+40283
+40289
+40343
+40351
+40357
+40361
+40387
+40423
+40427
+40429
+40433
+40459
+40471
+40483
+40487
+40493
+40499
+40507
+40519
+40529
+40531
+40543
+40559
+40577
+40583
+40591
+40597
+40609
+40627
+40637
+40639
+40693
+40697
+40699
+40709
+40739
+40751
+40759
+40763
+40771
+40787
+40801
+40813
+40819
+40823
+40829
+40841
+40847
+40849
+40853
+40867
+40879
+40883
+40897
+40903
+40927
+40933
+40939
+40949
+40961
+40973
+40993
+41011
+41017
+41023
+41039
+41047
+41051
+41057
+41077
+41081
+41113
+41117
+41131
+41141
+41143
+41149
+41161
+41177
+41179
+41183
+41189
+41201
+41203
+41213
+41221
+41227
+41231
+41233
+41243
+41257
+41263
+41269
+41281
+41299
+41333
+41341
+41351
+41357
+41381
+41387
+41389
+41399
+41411
+41413
+41443
+41453
+41467
+41479
+41491
+41507
+41513
+41519
+41521
+41539
+41543
+41549
+41579
+41593
+41597
+41603
+41609
+41611
+41617
+41621
+41627
+41641
+41647
+41651
+41659
+41669
+41681
+41687
+41719
+41729
+41737
+41759
+41761
+41771
+41777
+41801
+41809
+41813
+41843
+41849
+41851
+41863
+41879
+41887
+41893
+41897
+41903
+41911
+41927
+41941
+41947
+41953
+41957
+41959
+41969
+41981
+41983
+41999
+42013
+42017
+42019
+42023
+42043
+42061
+42071
+42073
+42083
+42089
+42101
+42131
+42139
+42157
+42169
+42179
+42181
+42187
+42193
+42197
+42209
+42221
+42223
+42227
+42239
+42257
+42281
+42283
+42293
+42299
+42307
+42323
+42331
+42337
+42349
+42359
+42373
+42379
+42391
+42397
+42403
+42407
+42409
+42433
+42437
+42443
+42451
+42457
+42461
+42463
+42467
+42473
+42487
+42491
+42499
+42509
+42533
+42557
+42569
+42571
+42577
+42589
+42611
+42641
+42643
+42649
+42667
+42677
+42683
+42689
+42697
+42701
+42703
+42709
+42719
+42727
+42737
+42743
+42751
+42767
+42773
+42787
+42793
+42797
+42821
+42829
+42839
+42841
+42853
+42859
+42863
+42899
+42901
+42923
+42929
+42937
+42943
+42953
+42961
+42967
+42979
+42989
+43003
+43013
+43019
+43037
+43049
+43051
+43063
+43067
+43093
+43103
+43117
+43133
+43151
+43159
+43177
+43189
+43201
+43207
+43223
+43237
+43261
+43271
+43283
+43291
+43313
+43319
+43321
+43331
+43391
+43397
+43399
+43403
+43411
+43427
+43441
+43451
+43457
+43481
+43487
+43499
+43517
+43541
+43543
+43573
+43577
+43579
+43591
+43597
+43607
+43609
+43613
+43627
+43633
+43649
+43651
+43661
+43669
+43691
+43711
+43717
+43721
+43753
+43759
+43777
+43781
+43783
+43787
+43789
+43793
+43801
+43853
+43867
+43889
+43891
+43913
+43933
+43943
+43951
+43961
+43963
+43969
+43973
+43987
+43991
+43997
+44017
+44021
+44027
+44029
+44041
+44053
+44059
+44071
+44087
+44089
+44101
+44111
+44119
+44123
+44129
+44131
+44159
+44171
+44179
+44189
+44201
+44203
+44207
+44221
+44249
+44257
+44263
+44267
+44269
+44273
+44279
+44281
+44293
+44351
+44357
+44371
+44381
+44383
+44389
+44417
+44449
+44453
+44483
+44491
+44497
+44501
+44507
+44519
+44531
+44533
+44537
+44543
+44549
+44563
+44579
+44587
+44617
+44621
+44623
+44633
+44641
+44647
+44651
+44657
+44683
+44687
+44699
+44701
+44711
+44729
+44741
+44753
+44771
+44773
+44777
+44789
+44797
+44809
+44819
+44839
+44843
+44851
+44867
+44879
+44887
+44893
+44909
+44917
+44927
+44939
+44953
+44959
+44963
+44971
+44983
+44987
+45007
+45013
+45053
+45061
+45077
+45083
+45119
+45121
+45127
+45131
+45137
+45139
+45161
+45179
+45181
+45191
+45197
+45233
+45247
+45259
+45263
+45281
+45289
+45293
+45307
+45317
+45319
+45329
+45337
+45341
+45343
+45361
+45377
+45389
+45403
+45413
+45427
+45433
+45439
+45481
+45491
+45497
+45503
+45523
+45533
+45541
+45553
+45557
+45569
+45587
+45589
+45599
+45613
+45631
+45641
+45659
+45667
+45673
+45677
+45691
+45697
+45707
+45737
+45751
+45757
+45763
+45767
+45779
+45817
+45821
+45823
+45827
+45833
+45841
+45853
+45863
+45869
+45887
+45893
+45943
+45949
+45953
+45959
+45971
+45979
+45989
+46021
+46027
+46049
+46051
+46061
+46073
+46091
+46093
+46099
+46103
+46133
+46141
+46147
+46153
+46171
+46181
+46183
+46187
+46199
+46219
+46229
+46237
+46261
+46271
+46273
+46279
+46301
+46307
+46309
+46327
+46337
+46349
+46351
+46381
+46399
+46411
+46439
+46441
+46447
+46451
+46457
+46471
+46477
+46489
+46499
+46507
+46511
+46523
+46549
+46559
+46567
+46573
+46589
+46591
+46601
+46619
+46633
+46639
+46643
+46649
+46663
+46679
+46681
+46687
+46691
+46703
+46723
+46727
+46747
+46751
+46757
+46769
+46771
+46807
+46811
+46817
+46819
+46829
+46831
+46853
+46861
+46867
+46877
+46889
+46901
+46919
+46933
+46957
+46993
+46997
+47017
+47041
+47051
+47057
+47059
+47087
+47093
+47111
+47119
+47123
+47129
+47137
+47143
+47147
+47149
+47161
+47189
+47207
+47221
+47237
+47251
+47269
+47279
+47287
+47293
+47297
+47303
+47309
+47317
+47339
+47351
+47353
+47363
+47381
+47387
+47389
+47407
+47417
+47419
+47431
+47441
+47459
+47491
+47497
+47501
+47507
+47513
+47521
+47527
+47533
+47543
+47563
+47569
+47581
+47591
+47599
+47609
+47623
+47629
+47639
+47653
+47657
+47659
+47681
+47699
+47701
+47711
+47713
+47717
+47737
+47741
+47743
+47777
+47779
+47791
+47797
+47807
+47809
+47819
+47837
+47843
+47857
+47869
+47881
+47903
+47911
+47917
+47933
+47939
+47947
+47951
+47963
+47969
+47977
+47981
+48017
+48023
+48029
+48049
+48073
+48079
+48091
+48109
+48119
+48121
+48131
+48157
+48163
+48179
+48187
+48193
+48197
+48221
+48239
+48247
+48259
+48271
+48281
+48299
+48311
+48313
+48337
+48341
+48353
+48371
+48383
+48397
+48407
+48409
+48413
+48437
+48449
+48463
+48473
+48479
+48481
+48487
+48491
+48497
+48523
+48527
+48533
+48539
+48541
+48563
+48571
+48589
+48593
+48611
+48619
+48623
+48647
+48649
+48661
+48673
+48677
+48679
+48731
+48733
+48751
+48757
+48761
+48767
+48779
+48781
+48787
+48799
+48809
+48817
+48821
+48823
+48847
+48857
+48859
+48869
+48871
+48883
+48889
+48907
+48947
+48953
+48973
+48989
+48991
+49003
+49009
+49019
+49031
+49033
+49037
+49043
+49057
+49069
+49081
+49103
+49109
+49117
+49121
+49123
+49139
+49157
+49169
+49171
+49177
+49193
+49199
+49201
+49207
+49211
+49223
+49253
+49261
+49277
+49279
+49297
+49307
+49331
+49333
+49339
+49363
+49367
+49369
+49391
+49393
+49409
+49411
+49417
+49429
+49433
+49451
+49459
+49463
+49477
+49481
+49499
+49523
+49529
+49531
+49537
+49547
+49549
+49559
+49597
+49603
+49613
+49627
+49633
+49639
+49663
+49667
+49669
+49681
+49697
+49711
+49727
+49739
+49741
+49747
+49757
+49783
+49787
+49789
+49801
+49807
+49811
+49823
+49831
+49843
+49853
+49871
+49877
+49891
+49919
+49921
+49927
+49937
+49939
+49943
+49957
+49991
+49993
+49999
+50021
+50023
+50033
+50047
+50051
+50053
+50069
+50077
+50087
+50093
+50101
+50111
+50119
+50123
+50129
+50131
+50147
+50153
+50159
+50177
+50207
+50221
+50227
+50231
+50261
+50263
+50273
+50287
+50291
+50311
+50321
+50329
+50333
+50341
+50359
+50363
+50377
+50383
+50387
+50411
+50417
+50423
+50441
+50459
+50461
+50497
+50503
+50513
+50527
+50539
+50543
+50549
+50551
+50581
+50587
+50591
+50593
+50599
+50627
+50647
+50651
+50671
+50683
+50707
+50723
+50741
+50753
+50767
+50773
+50777
+50789
+50821
+50833
+50839
+50849
+50857
+50867
+50873
+50891
+50893
+50909
+50923
+50929
+50951
+50957
+50969
+50971
+50989
+50993
+51001
+51031
+51043
+51047
+51059
+51061
+51071
+51109
+51131
+51133
+51137
+51151
+51157
+51169
+51193
+51197
+51199
+51203
+51217
+51229
+51239
+51241
+51257
+51263
+51283
+51287
+51307
+51329
+51341
+51343
+51347
+51349
+51361
+51383
+51407
+51413
+51419
+51421
+51427
+51431
+51437
+51439
+51449
+51461
+51473
+51479
+51481
+51487
+51503
+51511
+51517
+51521
+51539
+51551
+51563
+51577
+51581
+51593
+51599
+51607
+51613
+51631
+51637
+51647
+51659
+51673
+51679
+51683
+51691
+51713
+51719
+51721
+51749
+51767
+51769
+51787
+51797
+51803
+51817
+51827
+51829
+51839
+51853
+51859
+51869
+51871
+51893
+51899
+51907
+51913
+51929
+51941
+51949
+51971
+51973
+51977
+51991
+52009
+52021
+52027
+52051
+52057
+52067
+52069
+52081
+52103
+52121
+52127
+52147
+52153
+52163
+52177
+52181
+52183
+52189
+52201
+52223
+52237
+52249
+52253
+52259
+52267
+52289
+52291
+52301
+52313
+52321
+52361
+52363
+52369
+52379
+52387
+52391
+52433
+52453
+52457
+52489
+52501
+52511
+52517
+52529
+52541
+52543
+52553
+52561
+52567
+52571
+52579
+52583
+52609
+52627
+52631
+52639
+52667
+52673
+52691
+52697
+52709
+52711
+52721
+52727
+52733
+52747
+52757
+52769
+52783
+52807
+52813
+52817
+52837
+52859
+52861
+52879
+52883
+52889
+52901
+52903
+52919
+52937
+52951
+52957
+52963
+52967
+52973
+52981
+52999
+53003
+53017
+53047
+53051
+53069
+53077
+53087
+53089
+53093
+53101
+53113
+53117
+53129
+53147
+53149
+53161
+53171
+53173
+53189
+53197
+53201
+53231
+53233
+53239
+53267
+53269
+53279
+53281
+53299
+53309
+53323
+53327
+53353
+53359
+53377
+53381
+53401
+53407
+53411
+53419
+53437
+53441
+53453
+53479
+53503
+53507
+53527
+53549
+53551
+53569
+53591
+53593
+53597
+53609
+53611
+53617
+53623
+53629
+53633
+53639
+53653
+53657
+53681
+53693
+53699
+53717
+53719
+53731
+53759
+53773
+53777
+53783
+53791
+53813
+53819
+53831
+53849
+53857
+53861
+53881
+53887
+53891
+53897
+53899
+53917
+53923
+53927
+53939
+53951
+53959
+53987
+53993
+54001
+54011
+54013
+54037
+54049
+54059
+54083
+54091
+54101
+54121
+54133
+54139
+54151
+54163
+54167
+54181
+54193
+54217
+54251
+54269
+54277
+54287
+54293
+54311
+54319
+54323
+54331
+54347
+54361
+54367
+54371
+54377
+54401
+54403
+54409
+54413
+54419
+54421
+54437
+54443
+54449
+54469
+54493
+54497
+54499
+54503
+54517
+54521
+54539
+54541
+54547
+54559
+54563
+54577
+54581
+54583
+54601
+54617
+54623
+54629
+54631
+54647
+54667
+54673
+54679
+54709
+54713
+54721
+54727
+54751
+54767
+54773
+54779
+54787
+54799
+54829
+54833
+54851
+54869
+54877
+54881
+54907
+54917
+54919
+54941
+54949
+54959
+54973
+54979
+54983
+55001
+55009
+55021
+55049
+55051
+55057
+55061
+55073
+55079
+55103
+55109
+55117
+55127
+55147
+55163
+55171
+55201
+55207
+55213
+55217
+55219
+55229
+55243
+55249
+55259
+55291
+55313
+55331
+55333
+55337
+55339
+55343
+55351
+55373
+55381
+55399
+55411
+55439
+55441
+55457
+55469
+55487
+55501
+55511
+55529
+55541
+55547
+55579
+55589
+55603
+55609
+55619
+55621
+55631
+55633
+55639
+55661
+55663
+55667
+55673
+55681
+55691
+55697
+55711
+55717
+55721
+55733
+55763
+55787
+55793
+55799
+55807
+55813
+55817
+55819
+55823
+55829
+55837
+55843
+55849
+55871
+55889
+55897
+55901
+55903
+55921
+55927
+55931
+55933
+55949
+55967
+55987
+55997
+56003
+56009
+56039
+56041
+56053
+56081
+56087
+56093
+56099
+56101
+56113
+56123
+56131
+56149
+56167
+56171
+56179
+56197
+56207
+56209
+56237
+56239
+56249
+56263
+56267
+56269
+56299
+56311
+56333
+56359
+56369
+56377
+56383
+56393
+56401
+56417
+56431
+56437
+56443
+56453
+56467
+56473
+56477
+56479
+56489
+56501
+56503
+56509
+56519
+56527
+56531
+56533
+56543
+56569
+56591
+56597
+56599
+56611
+56629
+56633
+56659
+56663
+56671
+56681
+56687
+56701
+56711
+56713
+56731
+56737
+56747
+56767
+56773
+56779
+56783
+56807
+56809
+56813
+56821
+56827
+56843
+56857
+56873
+56891
+56893
+56897
+56909
+56911
+56921
+56923
+56929
+56941
+56951
+56957
+56963
+56983
+56989
+56993
+56999
+57037
+57041
+57047
+57059
+57073
+57077
+57089
+57097
+57107
+57119
+57131
+57139
+57143
+57149
+57163
+57173
+57179
+57191
+57193
+57203
+57221
+57223
+57241
+57251
+57259
+57269
+57271
+57283
+57287
+57301
+57329
+57331
+57347
+57349
+57367
+57373
+57383
+57389
+57397
+57413
+57427
+57457
+57467
+57487
+57493
+57503
+57527
+57529
+57557
+57559
+57571
+57587
+57593
+57601
+57637
+57641
+57649
+57653
+57667
+57679
+57689
+57697
+57709
+57713
+57719
+57727
+57731
+57737
+57751
+57773
+57781
+57787
+57791
+57793
+57803
+57809
+57829
+57839
+57847
+57853
+57859
+57881
+57899
+57901
+57917
+57923
+57943
+57947
+57973
+57977
+57991
+58013
+58027
+58031
+58043
+58049
+58057
+58061
+58067
+58073
+58099
+58109
+58111
+58129
+58147
+58151
+58153
+58169
+58171
+58189
+58193
+58199
+58207
+58211
+58217
+58229
+58231
+58237
+58243
+58271
+58309
+58313
+58321
+58337
+58363
+58367
+58369
+58379
+58391
+58393
+58403
+58411
+58417
+58427
+58439
+58441
+58451
+58453
+58477
+58481
+58511
+58537
+58543
+58549
+58567
+58573
+58579
+58601
+58603
+58613
+58631
+58657
+58661
+58679
+58687
+58693
+58699
+58711
+58727
+58733
+58741
+58757
+58763
+58771
+58787
+58789
+58831
+58889
+58897
+58901
+58907
+58909
+58913
+58921
+58937
+58943
+58963
+58967
+58979
+58991
+58997
+59009
+59011
+59021
+59023
+59029
+59051
+59053
+59063
+59069
+59077
+59083
+59093
+59107
+59113
+59119
+59123
+59141
+59149
+59159
+59167
+59183
+59197
+59207
+59209
+59219
+59221
+59233
+59239
+59243
+59263
+59273
+59281
+59333
+59341
+59351
+59357
+59359
+59369
+59377
+59387
+59393
+59399
+59407
+59417
+59419
+59441
+59443
+59447
+59453
+59467
+59471
+59473
+59497
+59509
+59513
+59539
+59557
+59561
+59567
+59581
+59611
+59617
+59621
+59627
+59629
+59651
+59659
+59663
+59669
+59671
+59693
+59699
+59707
+59723
+59729
+59743
+59747
+59753
+59771
+59779
+59791
+59797
+59809
+59833
+59863
+59879
+59887
+59921
+59929
+59951
+59957
+59971
+59981
+59999
+60013
+60017
+60029
+60037
+60041
+60077
+60083
+60089
+60091
+60101
+60103
+60107
+60127
+60133
+60139
+60149
+60161
+60167
+60169
+60209
+60217
+60223
+60251
+60257
+60259
+60271
+60289
+60293
+60317
+60331
+60337
+60343
+60353
+60373
+60383
+60397
+60413
+60427
+60443
+60449
+60457
+60493
+60497
+60509
+60521
+60527
+60539
+60589
+60601
+60607
+60611
+60617
+60623
+60631
+60637
+60647
+60649
+60659
+60661
+60679
+60689
+60703
+60719
+60727
+60733
+60737
+60757
+60761
+60763
+60773
+60779
+60793
+60811
+60821
+60859
+60869
+60887
+60889
+60899
+60901
+60913
+60917
+60919
+60923
+60937
+60943
+60953
+60961
+61001
+61007
+61027
+61031
+61043
+61051
+61057
+61091
+61099
+61121
+61129
+61141
+61151
+61153
+61169
+61211
+61223
+61231
+61253
+61261
+61283
+61291
+61297
+61331
+61333
+61339
+61343
+61357
+61363
+61379
+61381
+61403
+61409
+61417
+61441
+61463
+61469
+61471
+61483
+61487
+61493
+61507
+61511
+61519
+61543
+61547
+61553
+61559
+61561
+61583
+61603
+61609
+61613
+61627
+61631
+61637
+61643
+61651
+61657
+61667
+61673
+61681
+61687
+61703
+61717
+61723
+61729
+61751
+61757
+61781
+61813
+61819
+61837
+61843
+61861
+61871
+61879
+61909
+61927
+61933
+61949
+61961
+61967
+61979
+61981
+61987
+61991
+62003
+62011
+62017
+62039
+62047
+62053
+62057
+62071
+62081
+62099
+62119
+62129
+62131
+62137
+62141
+62143
+62171
+62189
+62191
+62201
+62207
+62213
+62219
+62233
+62273
+62297
+62299
+62303
+62311
+62323
+62327
+62347
+62351
+62383
+62401
+62417
+62423
+62459
+62467
+62473
+62477
+62483
+62497
+62501
+62507
+62533
+62539
+62549
+62563
+62581
+62591
+62597
+62603
+62617
+62627
+62633
+62639
+62653
+62659
+62683
+62687
+62701
+62723
+62731
+62743
+62753
+62761
+62773
+62791
+62801
+62819
+62827
+62851
+62861
+62869
+62873
+62897
+62903
+62921
+62927
+62929
+62939
+62969
+62971
+62981
+62983
+62987
+62989
+63029
+63031
+63059
+63067
+63073
+63079
+63097
+63103
+63113
+63127
+63131
+63149
+63179
+63197
+63199
+63211
+63241
+63247
+63277
+63281
+63299
+63311
+63313
+63317
+63331
+63337
+63347
+63353
+63361
+63367
+63377
+63389
+63391
+63397
+63409
+63419
+63421
+63439
+63443
+63463
+63467
+63473
+63487
+63493
+63499
+63521
+63527
+63533
+63541
+63559
+63577
+63587
+63589
+63599
+63601
+63607
+63611
+63617
+63629
+63647
+63649
+63659
+63667
+63671
+63689
+63691
+63697
+63703
+63709
+63719
+63727
+63737
+63743
+63761
+63773
+63781
+63793
+63799
+63803
+63809
+63823
+63839
+63841
+63853
+63857
+63863
+63901
+63907
+63913
+63929
+63949
+63977
+63997
+64007
+64013
+64019
+64033
+64037
+64063
+64067
+64081
+64091
+64109
+64123
+64151
+64153
+64157
+64171
+64187
+64189
+64217
+64223
+64231
+64237
+64271
+64279
+64283
+64301
+64303
+64319
+64327
+64333
+64373
+64381
+64399
+64403
+64433
+64439
+64451
+64453
+64483
+64489
+64499
+64513
+64553
+64567
+64577
+64579
+64591
+64601
+64609
+64613
+64621
+64627
+64633
+64661
+64663
+64667
+64679
+64693
+64709
+64717
+64747
+64763
+64781
+64783
+64793
+64811
+64817
+64849
+64853
+64871
+64877
+64879
+64891
+64901
+64919
+64921
+64927
+64937
+64951
+64969
+64997
+65003
+65011
+65027
+65029
+65033
+65053
+65063
+65071
+65089
+65099
+65101
+65111
+65119
+65123
+65129
+65141
+65147
+65167
+65171
+65173
+65179
+65183
+65203
+65213
+65239
+65257
+65267
+65269
+65287
+65293
+65309
+65323
+65327
+65353
+65357
+65371
+65381
+65393
+65407
+65413
+65419
+65423
+65437
+65447
+65449
+65479
+65497
+65519
+65521
diff --git a/security/nss/lib/freebl/mpi/doc/prng.pod b/security/nss/lib/freebl/mpi/doc/prng.pod
new file mode 100644
index 0000000000..6da4d4a9c4
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/prng.pod
@@ -0,0 +1,38 @@
+=head1 NAME
+
+ prng - pseudo-random number generator
+
+=head1 SYNOPSIS
+
+ prng [count]
+
+=head1 DESCRIPTION
+
+B<Prng> generates 32-bit pseudo-random integers using the
+Blum-Blum-Shub (BBS) quadratic residue generator. It is seeded using
+the standard C library's rand() function, which itself seeded from the
+system clock and the process ID number. Thus, the values generated
+are not particularly useful for cryptographic applications, but they
+are in general much better than the typical output of the usual
+multiplicative congruency generator used by most runtime libraries.
+
+You may optionally specify how many random values should be generated
+by giving a I<count> argument on the command line. If you do not
+specify a count, only one random value will be generated. The results
+are output to the standard output in decimal notation, one value per
+line.
+
+=head1 RESTRICTIONS
+
+As stated above, B<prng> uses the C library's rand() function to seed
+the generator, so it is not terribly suitable for cryptographic
+applications. Also note that each time you run the program, a new
+seed is generated, so it is better to run it once with a I<count>
+parameter than it is to run it multiple times to generate several
+values.
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Copyright (C) 1998 Michael J. Fromberger, All Rights Reserved
+ Thayer School of Engineering, Dartmouth College, Hanover, NH USA
diff --git a/security/nss/lib/freebl/mpi/doc/redux.txt b/security/nss/lib/freebl/mpi/doc/redux.txt
new file mode 100644
index 0000000000..0df0f0390a
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/redux.txt
@@ -0,0 +1,86 @@
+Modular Reduction
+
+Usually, modular reduction is accomplished by long division, using the
+mp_div() or mp_mod() functions. However, when performing modular
+exponentiation, you spend a lot of time reducing by the same modulus
+again and again. For this purpose, doing a full division for each
+multiplication is quite inefficient.
+
+For this reason, the mp_exptmod() function does not perform modular
+reductions in the usual way, but instead takes advantage of an
+algorithm due to Barrett, as described by Menezes, Oorschot and
+VanStone in their book _Handbook of Applied Cryptography_, published
+by the CRC Press (see Chapter 14 for details). This method reduces
+most of the computation of reduction to efficient shifting and masking
+operations, and avoids the multiple-precision division entirely.
+
+Here is a brief synopsis of Barrett reduction, as it is implemented in
+this library.
+
+Let b denote the radix of the computation (one more than the maximum
+value that can be denoted by an mp_digit). Let m be the modulus, and
+let k be the number of significant digits of m. Let x be the value to
+be reduced modulo m. By the Division Theorem, there exist unique
+integers Q and R such that:
+
+ x = Qm + R, 0 <= R < m
+
+Barrett reduction takes advantage of the fact that you can easily
+approximate Q to within two, given a value M such that:
+
+ 2k
+ b
+ M = floor( ----- )
+ m
+
+Computation of M requires a full-precision division step, so if you
+are only doing a single reduction by m, you gain no advantage.
+However, when multiple reductions by the same m are required, this
+division need only be done once, beforehand. Using this, we can use
+the following equation to compute Q', an approximation of Q:
+
+ x
+ floor( ------ ) M
+ k-1
+ b
+Q' = floor( ----------------- )
+ k+1
+ b
+
+The divisions by b^(k-1) and b^(k+1) and the floor() functions can be
+efficiently implemented with shifts and masks, leaving only a single
+multiplication to be performed to get this approximation. It can be
+shown that Q - 2 <= Q' <= Q, so in the worst case, we can get out with
+two additional subtractions to bring the value into line with the
+actual value of Q.
+
+Once we've got Q', we basically multiply that by m and subtract from
+x, yielding:
+
+ x - Q'm = Qm + R - Q'm
+
+Since we know the constraint on Q', this is one of:
+
+ R
+ m + R
+ 2m + R
+
+Since R < m by the Division Theorem, we can simply subtract off m
+until we get a value in the correct range, which will happen with no
+more than 2 subtractions:
+
+ v = x - Q'm
+
+ while(v >= m)
+ v = v - m
+ endwhile
+
+
+In random performance trials, modular exponentiation using this method
+of reduction gave around a 40% speedup over using the division for
+reduction.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/sqrt.txt b/security/nss/lib/freebl/mpi/doc/sqrt.txt
new file mode 100644
index 0000000000..4529cbfc46
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/sqrt.txt
@@ -0,0 +1,50 @@
+Square Root
+
+A simple iterative algorithm is used to compute the greatest integer
+less than or equal to the square root. Essentially, this is Newton's
+linear approximation, computed by finding successive values of the
+equation:
+
+ x[k]^2 - V
+x[k+1] = x[k] - ------------
+ 2 x[k]
+
+...where V is the value for which the square root is being sought. In
+essence, what is happening here is that we guess a value for the
+square root, then figure out how far off we were by squaring our guess
+and subtracting the target. Using this value, we compute a linear
+approximation for the error, and adjust the "guess". We keep doing
+this until the precision gets low enough that the above equation
+yields a quotient of zero. At this point, our last guess is one
+greater than the square root we're seeking.
+
+The initial guess is computed by dividing V by 4, which is a heuristic
+I have found to be fairly good on average. This also has the
+advantage of being very easy to compute efficiently, even for large
+values.
+
+So, the resulting algorithm works as follows:
+
+ x = V / 4 /* compute initial guess */
+
+ loop
+ t = (x * x) - V /* Compute absolute error */
+ u = 2 * x /* Adjust by tangent slope */
+ t = t / u
+
+ /* Loop is done if error is zero */
+ if(t == 0)
+ break
+
+ /* Adjust guess by error term */
+ x = x - t
+ end
+
+ x = x - 1
+
+The result of the computation is the value of x.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/square.txt b/security/nss/lib/freebl/mpi/doc/square.txt
new file mode 100644
index 0000000000..edbb97882c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/square.txt
@@ -0,0 +1,72 @@
+Squaring Algorithm
+
+When you are squaring a value, you can take advantage of the fact that
+half the multiplications performed by the more general multiplication
+algorithm (see 'mul.txt' for a description) are redundant when the
+multiplicand equals the multiplier.
+
+In particular, the modified algorithm is:
+
+k = 0
+for j <- 0 to (#a - 1)
+ w = c[2*j] + (a[j] ^ 2);
+ k = w div R
+
+ for i <- j+1 to (#a - 1)
+ w = (2 * a[j] * a[i]) + k + c[i+j]
+ c[i+j] = w mod R
+ k = w div R
+ endfor
+ c[i+j] = k;
+ k = 0;
+endfor
+
+On the surface, this looks identical to the multiplication algorithm;
+however, note the following differences:
+
+ - precomputation of the leading term in the outer loop
+
+ - i runs from j+1 instead of from zero
+
+ - doubling of a[i] * a[j] in the inner product
+
+Unfortunately, the construction of the inner product is such that we
+need more than two digits to represent the inner product, in some
+cases. In a C implementation, this means that some gymnastics must be
+performed in order to handle overflow, for which C has no direct
+abstraction. We do this by observing the following:
+
+If we have multiplied a[i] and a[j], and the product is more than half
+the maximum value expressible in two digits, then doubling this result
+will overflow into a third digit. If this occurs, we take note of the
+overflow, and double it anyway -- C integer arithmetic ignores
+overflow, so the two digits we get back should still be valid, modulo
+the overflow.
+
+Having doubled this value, we now have to add in the remainders and
+the digits already computed by earlier steps. If we did not overflow
+in the previous step, we might still cause an overflow here. That
+will happen whenever the maximum value expressible in two digits, less
+the amount we have to add, is greater than the result of the previous
+step. Thus, the overflow computation is:
+
+
+ u = 0
+ w = a[i] * a[j]
+
+ if(w > (R - 1)/ 2)
+ u = 1;
+
+ w = w * 2
+ v = c[i + j] + k
+
+ if(u == 0 && (R - 1 - v) < w)
+ u = 1
+
+If there is an overflow, u will be 1, otherwise u will be 0. The rest
+of the parameters are the same as they are in the above description.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/timing.txt b/security/nss/lib/freebl/mpi/doc/timing.txt
new file mode 100644
index 0000000000..58f37c9dff
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/timing.txt
@@ -0,0 +1,213 @@
+MPI Library Timing Tests
+
+Hardware/OS
+(A) SGI O2 1 x MIPS R10000 250MHz IRIX 6.5.3
+(B) IBM RS/6000 43P-240 1 x PowerPC 603e 223MHz AIX 4.3
+(C) Dell GX1/L+ 1 x Pentium III 550MHz Linux 2.2.12-20
+(D) PowerBook G3 1 x PowerPC 750 266MHz LinuxPPC 2.2.6-15apmac
+(E) PowerBook G3 1 x PowerPC 750 266MHz MacOS 8.5.1
+(F) PowerBook G3 1 x PowerPC 750 400MHz MacOS 9.0.2
+
+Compiler
+(1) MIPSpro C 7.2.1 -O3 optimizations
+(2) GCC 2.95.1 -O3 optimizations
+(3) IBM AIX xlc -O3 optimizations (version unknown)
+(4) EGCS 2.91.66 -O3 optimizations
+(5) Metrowerks CodeWarrior 5.0 C, all optimizations
+(6) MIPSpro C 7.30 -O3 optimizations
+(7) same as (6), with optimized libmalloc.so
+
+Timings are given in seconds, computed using the C library's clock()
+function. The first column gives the hardware and compiler
+configuration used for the test. The second column indicates the
+number of tests that were aggregated to get the statistics for that
+size. These were compiled using 16 bit digits.
+
+Source data were generated randomly using a fixed seed, so they should
+be internally consistent, but may vary on different systems depending
+on the C library. Also, since the resolution of the timer accessed by
+clock() varies, there may be some variance in the precision of these
+measurements.
+
+Prime Generation (primegen)
+
+128 bits:
+A1 200 min=0.03, avg=0.19, max=0.72, sum=38.46
+A2 200 min=0.02, avg=0.16, max=0.62, sum=32.55
+B3 200 min=0.01, avg=0.07, max=0.22, sum=13.29
+C4 200 min=0.00, avg=0.03, max=0.20, sum=6.14
+D4 200 min=0.00, avg=0.05, max=0.33, sum=9.70
+A6 200 min=0.01, avg=0.09, max=0.36, sum=17.48
+A7 200 min=0.00, avg=0.05, max=0.24, sum=10.07
+
+192 bits:
+A1 200 min=0.05, avg=0.45, max=3.13, sum=89.96
+A2 200 min=0.04, avg=0.39, max=2.61, sum=77.55
+B3 200 min=0.02, avg=0.18, max=1.25, sum=36.97
+C4 200 min=0.01, avg=0.09, max=0.33, sum=18.24
+D4 200 min=0.02, avg=0.15, max=0.54, sum=29.63
+A6 200 min=0.02, avg=0.24, max=1.70, sum=47.84
+A7 200 min=0.01, avg=0.15, max=1.05, sum=30.88
+
+256 bits:
+A1 200 min=0.08, avg=0.92, max=6.13, sum=184.79
+A2 200 min=0.06, avg=0.76, max=5.03, sum=151.11
+B3 200 min=0.04, avg=0.41, max=2.68, sum=82.35
+C4 200 min=0.02, avg=0.19, max=0.69, sum=37.91
+D4 200 min=0.03, avg=0.31, max=1.15, sum=63.00
+A6 200 min=0.04, avg=0.48, max=3.13, sum=95.46
+A7 200 min=0.03, avg=0.37, max=2.36, sum=73.60
+
+320 bits:
+A1 200 min=0.11, avg=1.59, max=6.14, sum=318.81
+A2 200 min=0.09, avg=1.27, max=4.93, sum=254.03
+B3 200 min=0.07, avg=0.82, max=3.13, sum=163.80
+C4 200 min=0.04, avg=0.44, max=1.91, sum=87.59
+D4 200 min=0.06, avg=0.73, max=3.22, sum=146.73
+A6 200 min=0.07, avg=0.93, max=3.50, sum=185.01
+A7 200 min=0.05, avg=0.76, max=2.94, sum=151.78
+
+384 bits:
+A1 200 min=0.16, avg=2.69, max=11.41, sum=537.89
+A2 200 min=0.13, avg=2.15, max=9.03, sum=429.14
+B3 200 min=0.11, avg=1.54, max=6.49, sum=307.78
+C4 200 min=0.06, avg=0.81, max=4.84, sum=161.13
+D4 200 min=0.10, avg=1.38, max=8.31, sum=276.81
+A6 200 min=0.11, avg=1.73, max=7.36, sum=345.55
+A7 200 min=0.09, avg=1.46, max=6.12, sum=292.02
+
+448 bits:
+A1 200 min=0.23, avg=3.36, max=15.92, sum=672.63
+A2 200 min=0.17, avg=2.61, max=12.25, sum=522.86
+B3 200 min=0.16, avg=2.10, max=9.83, sum=420.86
+C4 200 min=0.09, avg=1.44, max=7.64, sum=288.36
+D4 200 min=0.16, avg=2.50, max=13.29, sum=500.17
+A6 200 min=0.15, avg=2.31, max=10.81, sum=461.58
+A7 200 min=0.14, avg=2.03, max=9.53, sum=405.16
+
+512 bits:
+A1 200 min=0.30, avg=6.12, max=22.18, sum=1223.35
+A2 200 min=0.25, avg=4.67, max=16.90, sum=933.18
+B3 200 min=0.23, avg=4.13, max=14.94, sum=825.45
+C4 200 min=0.13, avg=2.08, max=9.75, sum=415.22
+D4 200 min=0.24, avg=4.04, max=20.18, sum=808.11
+A6 200 min=0.22, avg=4.47, max=16.19, sum=893.83
+A7 200 min=0.20, avg=4.03, max=14.65, sum=806.02
+
+Modular Exponentation (metime)
+
+The following results are aggregated from 200 pseudo-randomly
+generated tests, based on a fixed seed.
+
+ base, exponent, and modulus size (bits)
+P/C 128 192 256 320 384 448 512 640 768 896 1024
+------- -----------------------------------------------------------------
+A1 0.015 0.027 0.047 0.069 0.098 0.133 0.176 0.294 0.458 0.680 1.040
+A2 0.013 0.024 0.037 0.053 0.077 0.102 0.133 0.214 0.326 0.476 0.668
+B3 0.005 0.011 0.021 0.036 0.056 0.084 0.121 0.222 0.370 0.573 0.840
+C4 0.002 0.006 0.011 0.020 0.032 0.048 0.069 0.129 0.223 0.344 0.507
+D4 0.004 0.010 0.019 0.034 0.056 0.085 0.123 0.232 0.390 0.609 0.899
+E5 0.007 0.015 0.031 0.055 0.088 0.133 0.183 0.342 0.574 0.893 1.317
+A6 0.008 0.016 0.038 0.042 0.064 0.093 0.133 0.239 0.393 0.604 0.880
+A7 0.005 0.011 0.020 0.036 0.056 0.083 0.121 0.223 0.374 0.583 0.855
+
+Multiplication and Squaring tests, (mulsqr)
+
+The following results are aggregated from 500000 pseudo-randomly
+generated tests, based on a per-run wall-clock seed. Times are given
+in seconds, except where indicated in microseconds (us).
+
+(A1)
+
+bits multiply square ad percent time/mult time/square
+64 9.33 9.15 > 1.9 18.7us 18.3us
+128 10.88 10.44 > 4.0 21.8us 20.9us
+192 13.30 11.89 > 10.6 26.7us 23.8us
+256 14.88 12.64 > 15.1 29.8us 25.3us
+320 18.64 15.01 > 19.5 37.3us 30.0us
+384 23.11 17.70 > 23.4 46.2us 35.4us
+448 28.28 20.88 > 26.2 56.6us 41.8us
+512 34.09 24.51 > 28.1 68.2us 49.0us
+640 47.86 33.25 > 30.5 95.7us 66.5us
+768 64.91 43.54 > 32.9 129.8us 87.1us
+896 84.49 55.48 > 34.3 169.0us 111.0us
+1024 107.25 69.21 > 35.5 214.5us 138.4us
+1536 227.97 141.91 > 37.8 456.0us 283.8us
+2048 394.05 242.15 > 38.5 788.1us 484.3us
+
+(A2)
+
+bits multiply square ad percent time/mult time/square
+64 7.87 7.95 < 1.0 15.7us 15.9us
+128 9.40 9.19 > 2.2 18.8us 18.4us
+192 11.15 10.59 > 5.0 22.3us 21.2us
+256 12.02 11.16 > 7.2 24.0us 22.3us
+320 14.62 13.43 > 8.1 29.2us 26.9us
+384 17.72 15.80 > 10.8 35.4us 31.6us
+448 21.24 18.51 > 12.9 42.5us 37.0us
+512 25.36 21.78 > 14.1 50.7us 43.6us
+640 34.57 29.00 > 16.1 69.1us 58.0us
+768 46.10 37.60 > 18.4 92.2us 75.2us
+896 58.94 47.72 > 19.0 117.9us 95.4us
+1024 73.76 59.12 > 19.8 147.5us 118.2us
+1536 152.00 118.80 > 21.8 304.0us 237.6us
+2048 259.41 199.57 > 23.1 518.8us 399.1us
+
+(B3)
+
+bits multiply square ad percent time/mult time/square
+64 2.60 2.47 > 5.0 5.20us 4.94us
+128 4.43 4.06 > 8.4 8.86us 8.12us
+192 7.03 6.10 > 13.2 14.1us 12.2us
+256 10.44 8.59 > 17.7 20.9us 17.2us
+320 14.44 11.64 > 19.4 28.9us 23.3us
+384 19.12 15.08 > 21.1 38.2us 30.2us
+448 24.55 19.09 > 22.2 49.1us 38.2us
+512 31.03 23.53 > 24.2 62.1us 47.1us
+640 45.05 33.80 > 25.0 90.1us 67.6us
+768 63.02 46.05 > 26.9 126.0us 92.1us
+896 83.74 60.29 > 28.0 167.5us 120.6us
+1024 106.73 76.65 > 28.2 213.5us 153.3us
+1536 228.94 160.98 > 29.7 457.9us 322.0us
+2048 398.08 275.93 > 30.7 796.2us 551.9us
+
+(C4)
+
+bits multiply square ad percent time/mult time/square
+64 1.34 1.28 > 4.5 2.68us 2.56us
+128 2.76 2.59 > 6.2 5.52us 5.18us
+192 4.52 4.16 > 8.0 9.04us 8.32us
+256 6.64 5.99 > 9.8 13.3us 12.0us
+320 9.20 8.13 > 11.6 18.4us 16.3us
+384 12.01 10.58 > 11.9 24.0us 21.2us
+448 15.24 13.33 > 12.5 30.5us 26.7us
+512 19.02 16.46 > 13.5 38.0us 32.9us
+640 27.56 23.54 > 14.6 55.1us 47.1us
+768 37.89 31.78 > 16.1 75.8us 63.6us
+896 49.24 41.42 > 15.9 98.5us 82.8us
+1024 62.59 52.18 > 16.6 125.2us 104.3us
+1536 131.66 107.72 > 18.2 263.3us 215.4us
+2048 226.45 182.95 > 19.2 453.0us 365.9us
+
+(A7)
+
+bits multiply square ad percent time/mult time/square
+64 1.74 1.71 > 1.7 3.48us 3.42us
+128 3.48 2.96 > 14.9 6.96us 5.92us
+192 5.74 4.60 > 19.9 11.5us 9.20us
+256 8.75 6.61 > 24.5 17.5us 13.2us
+320 12.5 8.99 > 28.1 25.0us 18.0us
+384 16.9 11.9 > 29.6 33.8us 23.8us
+448 22.2 15.2 > 31.7 44.4us 30.4us
+512 28.3 19.0 > 32.7 56.6us 38.0us
+640 42.4 28.0 > 34.0 84.8us 56.0us
+768 59.4 38.5 > 35.2 118.8us 77.0us
+896 79.5 51.2 > 35.6 159.0us 102.4us
+1024 102.6 65.5 > 36.2 205.2us 131.0us
+1536 224.3 140.6 > 37.3 448.6us 281.2us
+2048 393.4 244.3 > 37.9 786.8us 488.6us
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/hpma512.s b/security/nss/lib/freebl/mpi/hpma512.s
new file mode 100644
index 0000000000..ae9da630d1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/hpma512.s
@@ -0,0 +1,615 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/*
+ *
+ * This PA-RISC 2.0 function computes the product of two unsigned integers,
+ * and adds the result to a previously computed integer. The multiplicand
+ * is a 512-bit (64-byte, eight doubleword) unsigned integer, stored in
+ * memory in little-double-wordian order. The multiplier is an unsigned
+ * 64-bit integer. The previously computed integer to which the product is
+ * added is located in the result ("res") area, and is assumed to be a
+ * 576-bit (72-byte, nine doubleword) unsigned integer, stored in memory
+ * in little-double-wordian order. This value normally will be the result
+ * of a previously computed nine doubleword result. It is not necessary
+ * to pad the multiplicand with an additional 64-bit zero doubleword.
+ *
+ * Multiplicand, multiplier, and addend ideally should be aligned at
+ * 16-byte boundaries for best performance. The code will function
+ * correctly for alignment at eight-byte boundaries which are not 16-byte
+ * boundaries, but the execution may be slightly slower due to even/odd
+ * bank conflicts on PA-RISC 8000 processors.
+ *
+ * This function is designed to accept the same calling sequence as Bill
+ * Ackerman's "maxpy_little" function. The carry from the ninth doubleword
+ * of the result is written to the tenth word of the result, as is done by
+ * Bill Ackerman's function. The final carry also is returned as an
+ * integer, which may be ignored. The function prototype may be either
+ * of the following:
+ *
+ * void multacc512( int l, chunk* m, const chunk* a, chunk* res );
+ * or
+ * int multacc512( int l, chunk* m, const chunk* a, chunk* res );
+ *
+ * where: "l" originally denoted vector lengths. This parameter is
+ * ignored. This function always assumes a multiplicand length of
+ * 512 bits (eight doublewords), and addend and result lengths of
+ * 576 bits (nine doublewords).
+ *
+ * "m" is a pointer to the doubleword multiplier, ideally aligned
+ * on a 16-byte boundary.
+ *
+ * "a" is a pointer to the eight-doubleword multiplicand, stored
+ * in little-double-wordian order, and ideally aligned on a 16-byte
+ * boundary.
+ *
+ * "res" is a pointer to the nine doubleword addend, and to the
+ * nine-doubleword product computed by this function. The result
+ * also is stored in little-double-wordian order, and ideally is
+ * aligned on a 16-byte boundary. It is expected that the alignment
+ * of the "res" area may alternate between even/odd doubleword
+ * boundaries for successive calls for 512-bit x 512-bit
+ * multiplications.
+ *
+ * The code for this function has been scheduled to use the parallelism
+ * of the PA-RISC 8000 series microprocessors as well as the author was
+ * able. Comments and/or suggestions for improvement are welcomed.
+ *
+ * The code is "64-bit safe". This means it may be called in either
+ * the 32ILP context or the 64LP context. All 64-bits of registers are
+ * saved and restored.
+ *
+ * This code is self-contained. It requires no other header files in order
+ * to compile and to be linkable on a PA-RISC 2.0 machine. Symbolic
+ * definitions for registers and stack offsets are included within this
+ * one source file.
+ *
+ * This is a leaf routine. As such, minimal use is made of the stack area.
+ * Of the 192 bytes allocated, 64 bytes are used for saving/restoring eight
+ * general registers, and 128 bytes are used to move intermediate products
+ * from the floating-point registers to the general registers. Stack
+ * protocols assure proper alignment of these areas.
+ *
+ */
+
+
+/* ====================================================================*/
+/* symbolic definitions for PA-RISC registers */
+/* in the MIPS style, avoids lots of case shifts */
+/* assigments (except t4) preserve register number parity */
+/* ====================================================================*/
+
+#define zero %r0 /* permanent zero */
+#define t5 %r1 /* temp register, altered by addil */
+
+#define rp %r2 /* return pointer */
+
+#define s1 %r3 /* callee saves register*/
+#define s0 %r4 /* callee saves register*/
+#define s3 %r5 /* callee saves register*/
+#define s2 %r6 /* callee saves register*/
+#define s5 %r7 /* callee saves register*/
+#define s4 %r8 /* callee saves register*/
+#define s7 %r9 /* callee saves register*/
+#define s6 %r10 /* callee saves register*/
+
+#define t1 %r19 /* caller saves register*/
+#define t0 %r20 /* caller saves register*/
+#define t3 %r21 /* caller saves register*/
+#define t2 %r22 /* caller saves register*/
+
+#define a3 %r23 /* fourth argument register, high word */
+#define a2 %r24 /* third argument register, low word*/
+#define a1 %r25 /* second argument register, high word*/
+#define a0 %r26 /* first argument register, low word*/
+
+#define v0 %r28 /* high order return value*/
+#define v1 %r29 /* low order return value*/
+
+#define sp %r30 /* stack pointer*/
+#define t4 %r31 /* temporary register */
+
+#define fa0 %fr4 /* first argument register*/
+#define fa1 %fr5 /* second argument register*/
+#define fa2 %fr6 /* third argument register*/
+#define fa3 %fr7 /* fourth argument register*/
+
+#define fa0r %fr4R /* first argument register*/
+#define fa1r %fr5R /* second argument register*/
+#define fa2r %fr6R /* third argument register*/
+#define fa3r %fr7R /* fourth argument register*/
+
+#define ft0 %fr8 /* caller saves register*/
+#define ft1 %fr9 /* caller saves register*/
+#define ft2 %fr10 /* caller saves register*/
+#define ft3 %fr11 /* caller saves register*/
+
+#define ft0r %fr8R /* caller saves register*/
+#define ft1r %fr9R /* caller saves register*/
+#define ft2r %fr10R /* caller saves register*/
+#define ft3r %fr11R /* caller saves register*/
+
+#define ft4 %fr22 /* caller saves register*/
+#define ft5 %fr23 /* caller saves register*/
+#define ft6 %fr24 /* caller saves register*/
+#define ft7 %fr25 /* caller saves register*/
+#define ft8 %fr26 /* caller saves register*/
+#define ft9 %fr27 /* caller saves register*/
+#define ft10 %fr28 /* caller saves register*/
+#define ft11 %fr29 /* caller saves register*/
+#define ft12 %fr30 /* caller saves register*/
+#define ft13 %fr31 /* caller saves register*/
+
+#define ft4r %fr22R /* caller saves register*/
+#define ft5r %fr23R /* caller saves register*/
+#define ft6r %fr24R /* caller saves register*/
+#define ft7r %fr25R /* caller saves register*/
+#define ft8r %fr26R /* caller saves register*/
+#define ft9r %fr27R /* caller saves register*/
+#define ft10r %fr28R /* caller saves register*/
+#define ft11r %fr29R /* caller saves register*/
+#define ft12r %fr30R /* caller saves register*/
+#define ft13r %fr31R /* caller saves register*/
+
+
+
+/* ================================================================== */
+/* functional definitions for PA-RISC registers */
+/* ================================================================== */
+
+/* general registers */
+
+#define T1 a0 /* temp, (length parameter ignored) */
+
+#define pM a1 /* -> 64-bit multiplier */
+#define T2 a1 /* temp, (after fetching multiplier) */
+
+#define pA a2 /* -> multiplicand vector (8 64-bit words) */
+#define T3 a2 /* temp, (after fetching multiplicand) */
+
+#define pR a3 /* -> addend vector (8 64-bit doublewords,
+ result vector (9 64-bit words) */
+
+#define S0 s0 /* callee saves summand registers */
+#define S1 s1
+#define S2 s2
+#define S3 s3
+#define S4 s4
+#define S5 s5
+#define S6 s6
+#define S7 s7
+
+#define S8 v0 /* caller saves summand registers */
+#define S9 v1
+#define S10 t0
+#define S11 t1
+#define S12 t2
+#define S13 t3
+#define S14 t4
+#define S15 t5
+
+
+
+/* floating-point registers */
+
+#define M fa0 /* multiplier double word */
+#define MR fa0r /* low order half of multiplier double word */
+#define ML fa0 /* high order half of multiplier double word */
+
+#define A0 fa2 /* multiplicand double word 0 */
+#define A0R fa2r /* low order half of multiplicand double word */
+#define A0L fa2 /* high order half of multiplicand double word */
+
+#define A1 fa3 /* multiplicand double word 1 */
+#define A1R fa3r /* low order half of multiplicand double word */
+#define A1L fa3 /* high order half of multiplicand double word */
+
+#define A2 ft0 /* multiplicand double word 2 */
+#define A2R ft0r /* low order half of multiplicand double word */
+#define A2L ft0 /* high order half of multiplicand double word */
+
+#define A3 ft1 /* multiplicand double word 3 */
+#define A3R ft1r /* low order half of multiplicand double word */
+#define A3L ft1 /* high order half of multiplicand double word */
+
+#define A4 ft2 /* multiplicand double word 4 */
+#define A4R ft2r /* low order half of multiplicand double word */
+#define A4L ft2 /* high order half of multiplicand double word */
+
+#define A5 ft3 /* multiplicand double word 5 */
+#define A5R ft3r /* low order half of multiplicand double word */
+#define A5L ft3 /* high order half of multiplicand double word */
+
+#define A6 ft4 /* multiplicand double word 6 */
+#define A6R ft4r /* low order half of multiplicand double word */
+#define A6L ft4 /* high order half of multiplicand double word */
+
+#define A7 ft5 /* multiplicand double word 7 */
+#define A7R ft5r /* low order half of multiplicand double word */
+#define A7L ft5 /* high order half of multiplicand double word */
+
+#define P0 ft6 /* product word 0 */
+#define P1 ft7 /* product word 0 */
+#define P2 ft8 /* product word 0 */
+#define P3 ft9 /* product word 0 */
+#define P4 ft10 /* product word 0 */
+#define P5 ft11 /* product word 0 */
+#define P6 ft12 /* product word 0 */
+#define P7 ft13 /* product word 0 */
+
+
+
+
+/* ====================================================================== */
+/* symbolic definitions for HP-UX stack offsets */
+/* symbolic definitions for memory NOPs */
+/* ====================================================================== */
+
+#define ST_SZ 192 /* stack area total size */
+
+#define SV0 -192(sp) /* general register save area */
+#define SV1 -184(sp)
+#define SV2 -176(sp)
+#define SV3 -168(sp)
+#define SV4 -160(sp)
+#define SV5 -152(sp)
+#define SV6 -144(sp)
+#define SV7 -136(sp)
+
+#define XF0 -128(sp) /* data transfer area */
+#define XF1 -120(sp) /* for floating-pt to integer regs */
+#define XF2 -112(sp)
+#define XF3 -104(sp)
+#define XF4 -96(sp)
+#define XF5 -88(sp)
+#define XF6 -80(sp)
+#define XF7 -72(sp)
+#define XF8 -64(sp)
+#define XF9 -56(sp)
+#define XF10 -48(sp)
+#define XF11 -40(sp)
+#define XF12 -32(sp)
+#define XF13 -24(sp)
+#define XF14 -16(sp)
+#define XF15 -8(sp)
+
+#define mnop proberi (sp),3,zero /* memory NOP */
+
+
+
+
+/* ====================================================================== */
+/* assembler formalities */
+/* ====================================================================== */
+
+#ifdef __LP64__
+ .level 2.0W
+#else
+ .level 2.0
+#endif
+ .space $TEXT$
+ .subspa $CODE$
+ .align 16
+
+/* ====================================================================== */
+/* here to compute 64-bit x 512-bit product + 512-bit addend */
+/* ====================================================================== */
+
+multacc512
+ .PROC
+ .CALLINFO
+ .ENTRY
+ fldd 0(pM),M ; multiplier double word
+ ldo ST_SZ(sp),sp ; push stack
+
+ fldd 0(pA),A0 ; multiplicand double word 0
+ std S1,SV1 ; save s1
+
+ fldd 16(pA),A2 ; multiplicand double word 2
+ std S3,SV3 ; save s3
+
+ fldd 32(pA),A4 ; multiplicand double word 4
+ std S5,SV5 ; save s5
+
+ fldd 48(pA),A6 ; multiplicand double word 6
+ std S7,SV7 ; save s7
+
+
+ std S0,SV0 ; save s0
+ fldd 8(pA),A1 ; multiplicand double word 1
+ xmpyu MR,A0L,P0 ; A0 cross 32-bit word products
+ xmpyu ML,A0R,P2
+
+ std S2,SV2 ; save s2
+ fldd 24(pA),A3 ; multiplicand double word 3
+ xmpyu MR,A2L,P4 ; A2 cross 32-bit word products
+ xmpyu ML,A2R,P6
+
+ std S4,SV4 ; save s4
+ fldd 40(pA),A5 ; multiplicand double word 5
+
+ std S6,SV6 ; save s6
+ fldd 56(pA),A7 ; multiplicand double word 7
+
+
+ fstd P0,XF0 ; MR * A0L
+ xmpyu MR,A0R,P0 ; A0 right 32-bit word product
+ xmpyu MR,A1L,P1 ; A1 cross 32-bit word product
+
+ fstd P2,XF2 ; ML * A0R
+ xmpyu ML,A0L,P2 ; A0 left 32-bit word product
+ xmpyu ML,A1R,P3 ; A1 cross 32-bit word product
+
+ fstd P4,XF4 ; MR * A2L
+ xmpyu MR,A2R,P4 ; A2 right 32-bit word product
+ xmpyu MR,A3L,P5 ; A3 cross 32-bit word product
+
+ fstd P6,XF6 ; ML * A2R
+ xmpyu ML,A2L,P6 ; A2 parallel 32-bit word product
+ xmpyu ML,A3R,P7 ; A3 cross 32-bit word product
+
+
+ ldd XF0,S0 ; MR * A0L
+ fstd P1,XF1 ; MR * A1L
+
+ ldd XF2,S2 ; ML * A0R
+ fstd P3,XF3 ; ML * A1R
+
+ ldd XF4,S4 ; MR * A2L
+ fstd P5,XF5 ; MR * A3L
+ xmpyu MR,A1R,P1 ; A1 parallel 32-bit word products
+ xmpyu ML,A1L,P3
+
+ ldd XF6,S6 ; ML * A2R
+ fstd P7,XF7 ; ML * A3R
+ xmpyu MR,A3R,P5 ; A3 parallel 32-bit word products
+ xmpyu ML,A3L,P7
+
+
+ fstd P0,XF0 ; MR * A0R
+ ldd XF1,S1 ; MR * A1L
+ nop
+ add S0,S2,T1 ; A0 cross product sum
+
+ fstd P2,XF2 ; ML * A0L
+ ldd XF3,S3 ; ML * A1R
+ add,dc zero,zero,S0 ; A0 cross product sum carry
+ depd,z T1,31,32,S2 ; A0 cross product sum << 32
+
+ fstd P4,XF4 ; MR * A2R
+ ldd XF5,S5 ; MR * A3L
+ shrpd S0,T1,32,S0 ; A0 carry | cross product sum >> 32
+ add S4,S6,T3 ; A2 cross product sum
+
+ fstd P6,XF6 ; ML * A2L
+ ldd XF7,S7 ; ML * A3R
+ add,dc zero,zero,S4 ; A2 cross product sum carry
+ depd,z T3,31,32,S6 ; A2 cross product sum << 32
+
+
+ ldd XF0,S8 ; MR * A0R
+ fstd P1,XF1 ; MR * A1R
+ xmpyu MR,A4L,P0 ; A4 cross 32-bit word product
+ xmpyu MR,A5L,P1 ; A5 cross 32-bit word product
+
+ ldd XF2,S10 ; ML * A0L
+ fstd P3,XF3 ; ML * A1L
+ xmpyu ML,A4R,P2 ; A4 cross 32-bit word product
+ xmpyu ML,A5R,P3 ; A5 cross 32-bit word product
+
+ ldd XF4,S12 ; MR * A2R
+ fstd P5,XF5 ; MR * A3L
+ xmpyu MR,A6L,P4 ; A6 cross 32-bit word product
+ xmpyu MR,A7L,P5 ; A7 cross 32-bit word product
+
+ ldd XF6,S14 ; ML * A2L
+ fstd P7,XF7 ; ML * A3L
+ xmpyu ML,A6R,P6 ; A6 cross 32-bit word product
+ xmpyu ML,A7R,P7 ; A7 cross 32-bit word product
+
+
+ fstd P0,XF0 ; MR * A4L
+ ldd XF1,S9 ; MR * A1R
+ shrpd S4,T3,32,S4 ; A2 carry | cross product sum >> 32
+ add S1,S3,T1 ; A1 cross product sum
+
+ fstd P2,XF2 ; ML * A4R
+ ldd XF3,S11 ; ML * A1L
+ add,dc zero,zero,S1 ; A1 cross product sum carry
+ depd,z T1,31,32,S3 ; A1 cross product sum << 32
+
+ fstd P4,XF4 ; MR * A6L
+ ldd XF5,S13 ; MR * A3R
+ shrpd S1,T1,32,S1 ; A1 carry | cross product sum >> 32
+ add S5,S7,T3 ; A3 cross product sum
+
+ fstd P6,XF6 ; ML * A6R
+ ldd XF7,S15 ; ML * A3L
+ add,dc zero,zero,S5 ; A3 cross product sum carry
+ depd,z T3,31,32,S7 ; A3 cross product sum << 32
+
+
+ shrpd S5,T3,32,S5 ; A3 carry | cross product sum >> 32
+ add S2,S8,S8 ; M * A0 right doubleword, P0 doubleword
+
+ add,dc S0,S10,S10 ; M * A0 left doubleword
+ add S3,S9,S9 ; M * A1 right doubleword
+
+ add,dc S1,S11,S11 ; M * A1 left doubleword
+ add S6,S12,S12 ; M * A2 right doubleword
+
+
+ ldd 24(pR),S3 ; Addend word 3
+ fstd P1,XF1 ; MR * A5L
+ add,dc S4,S14,S14 ; M * A2 left doubleword
+ xmpyu MR,A5R,P1 ; A5 right 32-bit word product
+
+ ldd 8(pR),S1 ; Addend word 1
+ fstd P3,XF3 ; ML * A5R
+ add S7,S13,S13 ; M * A3 right doubleword
+ xmpyu ML,A5L,P3 ; A5 left 32-bit word product
+
+ ldd 0(pR),S7 ; Addend word 0
+ fstd P5,XF5 ; MR * A7L
+ add,dc S5,S15,S15 ; M * A3 left doubleword
+ xmpyu MR,A7R,P5 ; A7 right 32-bit word product
+
+ ldd 16(pR),S5 ; Addend word 2
+ fstd P7,XF7 ; ML * A7R
+ add S10,S9,S9 ; P1 doubleword
+ xmpyu ML,A7L,P7 ; A7 left 32-bit word products
+
+
+ ldd XF0,S0 ; MR * A4L
+ fstd P1,XF9 ; MR * A5R
+ add,dc S11,S12,S12 ; P2 doubleword
+ xmpyu MR,A4R,P0 ; A4 right 32-bit word product
+
+ ldd XF2,S2 ; ML * A4R
+ fstd P3,XF11 ; ML * A5L
+ add,dc S14,S13,S13 ; P3 doubleword
+ xmpyu ML,A4L,P2 ; A4 left 32-bit word product
+
+ ldd XF6,S6 ; ML * A6R
+ fstd P5,XF13 ; MR * A7R
+ add,dc zero,S15,T2 ; P4 partial doubleword
+ xmpyu MR,A6R,P4 ; A6 right 32-bit word product
+
+ ldd XF4,S4 ; MR * A6L
+ fstd P7,XF15 ; ML * A7L
+ add S7,S8,S8 ; R0 + P0, new R0 doubleword
+ xmpyu ML,A6L,P6 ; A6 left 32-bit word product
+
+
+ fstd P0,XF0 ; MR * A4R
+ ldd XF7,S7 ; ML * A7R
+ add,dc S1,S9,S9 ; c + R1 + P1, new R1 doubleword
+
+ fstd P2,XF2 ; ML * A4L
+ ldd XF1,S1 ; MR * A5L
+ add,dc S5,S12,S12 ; c + R2 + P2, new R2 doubleword
+
+ fstd P4,XF4 ; MR * A6R
+ ldd XF5,S5 ; MR * A7L
+ add,dc S3,S13,S13 ; c + R3 + P3, new R3 doubleword
+
+ fstd P6,XF6 ; ML * A6L
+ ldd XF3,S3 ; ML * A5R
+ add,dc zero,T2,T2 ; c + partial P4
+ add S0,S2,T1 ; A4 cross product sum
+
+
+ std S8,0(pR) ; save R0
+ add,dc zero,zero,S0 ; A4 cross product sum carry
+ depd,z T1,31,32,S2 ; A4 cross product sum << 32
+
+ std S9,8(pR) ; save R1
+ shrpd S0,T1,32,S0 ; A4 carry | cross product sum >> 32
+ add S4,S6,T3 ; A6 cross product sum
+
+ std S12,16(pR) ; save R2
+ add,dc zero,zero,S4 ; A6 cross product sum carry
+ depd,z T3,31,32,S6 ; A6 cross product sum << 32
+
+
+ std S13,24(pR) ; save R3
+ shrpd S4,T3,32,S4 ; A6 carry | cross product sum >> 32
+ add S1,S3,T1 ; A5 cross product sum
+
+ ldd XF0,S8 ; MR * A4R
+ add,dc zero,zero,S1 ; A5 cross product sum carry
+ depd,z T1,31,32,S3 ; A5 cross product sum << 32
+
+ ldd XF2,S10 ; ML * A4L
+ ldd XF9,S9 ; MR * A5R
+ shrpd S1,T1,32,S1 ; A5 carry | cross product sum >> 32
+ add S5,S7,T3 ; A7 cross product sum
+
+ ldd XF4,S12 ; MR * A6R
+ ldd XF11,S11 ; ML * A5L
+ add,dc zero,zero,S5 ; A7 cross product sum carry
+ depd,z T3,31,32,S7 ; A7 cross product sum << 32
+
+ ldd XF6,S14 ; ML * A6L
+ ldd XF13,S13 ; MR * A7R
+ shrpd S5,T3,32,S5 ; A7 carry | cross product sum >> 32
+ add S2,S8,S8 ; M * A4 right doubleword
+
+
+ ldd XF15,S15 ; ML * A7L
+ add,dc S0,S10,S10 ; M * A4 left doubleword
+ add S3,S9,S9 ; M * A5 right doubleword
+
+ add,dc S1,S11,S11 ; M * A5 left doubleword
+ add S6,S12,S12 ; M * A6 right doubleword
+
+ ldd 32(pR),S0 ; Addend word 4
+ ldd 40(pR),S1 ; Addend word 5
+ add,dc S4,S14,S14 ; M * A6 left doubleword
+ add S7,S13,S13 ; M * A7 right doubleword
+
+ ldd 48(pR),S2 ; Addend word 6
+ ldd 56(pR),S3 ; Addend word 7
+ add,dc S5,S15,S15 ; M * A7 left doubleword
+ add S8,T2,S8 ; P4 doubleword
+
+ ldd 64(pR),S4 ; Addend word 8
+ ldd SV5,s5 ; restore s5
+ add,dc S10,S9,S9 ; P5 doubleword
+ add,dc S11,S12,S12 ; P6 doubleword
+
+
+ ldd SV6,s6 ; restore s6
+ ldd SV7,s7 ; restore s7
+ add,dc S14,S13,S13 ; P7 doubleword
+ add,dc zero,S15,S15 ; P8 doubleword
+
+ add S0,S8,S8 ; new R4 doubleword
+
+ ldd SV0,s0 ; restore s0
+ std S8,32(pR) ; save R4
+ add,dc S1,S9,S9 ; new R5 doubleword
+
+ ldd SV1,s1 ; restore s1
+ std S9,40(pR) ; save R5
+ add,dc S2,S12,S12 ; new R6 doubleword
+
+ ldd SV2,s2 ; restore s2
+ std S12,48(pR) ; save R6
+ add,dc S3,S13,S13 ; new R7 doubleword
+
+ ldd SV3,s3 ; restore s3
+ std S13,56(pR) ; save R7
+ add,dc S4,S15,S15 ; new R8 doubleword
+
+ ldd SV4,s4 ; restore s4
+ std S15,64(pR) ; save result[8]
+ add,dc zero,zero,v0 ; return carry from R8
+
+ CMPIB,*= 0,v0,$L0 ; if no overflow, exit
+ LDO 8(pR),pR
+
+$FINAL1 ; Final carry propagation
+ LDD 64(pR),v0
+ LDO 8(pR),pR
+ ADDI 1,v0,v0
+ CMPIB,*= 0,v0,$FINAL1 ; Keep looping if there is a carry.
+ STD v0,56(pR)
+$L0
+ bv zero(rp) ; -> caller
+ ldo -ST_SZ(sp),sp ; pop stack
+
+/* ====================================================================== */
+/* end of module */
+/* ====================================================================== */
+
+
+ bve (rp)
+ .EXIT
+ nop
+ .PROCEND
+ .SPACE $TEXT$
+ .SUBSPA $CODE$
+ .EXPORT multacc512,ENTRY
+
+ .end
diff --git a/security/nss/lib/freebl/mpi/hppa20.s b/security/nss/lib/freebl/mpi/hppa20.s
new file mode 100644
index 0000000000..c72de8a12b
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/hppa20.s
@@ -0,0 +1,904 @@
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this
+; file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifdef __LP64__
+ .LEVEL 2.0W
+#else
+; .LEVEL 1.1
+; .ALLOW 2.0N
+ .LEVEL 2.0
+#endif
+ .SPACE $TEXT$,SORT=8
+ .SUBSPA $CODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,CODE_ONLY,SORT=24
+
+; ***************************************************************
+;
+; maxpy_[little/big]
+;
+; ***************************************************************
+
+; There is no default -- you must specify one or the other.
+#define LITTLE_WORDIAN 1
+
+#ifdef LITTLE_WORDIAN
+#define EIGHT 8
+#define SIXTEEN 16
+#define THIRTY_TWO 32
+#define UN_EIGHT -8
+#define UN_SIXTEEN -16
+#define UN_TWENTY_FOUR -24
+#endif
+
+#ifdef BIG_WORDIAN
+#define EIGHT -8
+#define SIXTEEN -16
+#define THIRTY_TWO -32
+#define UN_EIGHT 8
+#define UN_SIXTEEN 16
+#define UN_TWENTY_FOUR 24
+#endif
+
+; This performs a multiple-precision integer version of "daxpy",
+; Using the selected addressing direction. "Little-wordian" means that
+; the least significant word of a number is stored at the lowest address.
+; "Big-wordian" means that the most significant word is at the lowest
+; address. Either way, the incoming address of the vector is that
+; of the least significant word. That means that, for little-wordian
+; addressing, we move the address upward as we propagate carries
+; from the least significant word to the most significant. For
+; big-wordian we move the address downward.
+
+; We use the following registers:
+;
+; r2 return PC, of course
+; r26 = arg1 = length
+; r25 = arg2 = address of scalar
+; r24 = arg3 = multiplicand vector
+; r23 = arg4 = result vector
+;
+; fr9 = scalar loaded once only from r25
+
+; The cycle counts shown in the bodies below are simply the result of a
+; scheduling by hand. The actual PCX-U hardware does it differently.
+; The intention is that the overall speed is the same.
+
+; The pipeline startup and shutdown code is constructed in the usual way,
+; by taking the loop bodies and removing unnecessary instructions.
+; We have left the comments describing cycle numbers in the code.
+; These are intended for reference when comparing with the main loop,
+; and have no particular relationship to actual cycle numbers.
+
+#ifdef LITTLE_WORDIAN
+maxpy_little
+#else
+maxpy_big
+#endif
+ .PROC
+ .CALLINFO FRAME=120,ENTRY_GR=4
+ .ENTRY
+ STW,MA %r3,128(%sp)
+ STW %r4,-124(%sp)
+
+ ADDIB,< -1,%r26,$L0 ; If N = 0, exit immediately.
+ FLDD 0(%r25),%fr9 ; fr9 = scalar
+
+; First startup
+
+ FLDD 0(%r24),%fr24 ; Cycle 1
+ XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
+ XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
+ XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
+ CMPIB,> 3,%r26,$N_IS_SMALL ; Pick out cases N = 1, 2, or 3
+ XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6
+ FLDD EIGHT(%r24),%fr28 ; Cycle 8
+ XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
+ FSTD %fr24,-96(%sp)
+ XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
+ FSTD %fr25,-80(%sp)
+ LDO SIXTEEN(%r24),%r24 ; Cycle 12
+ FSTD %fr31,-64(%sp)
+ XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
+ FSTD %fr27,-48(%sp)
+
+; Second startup
+
+ XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
+ FSTD %fr30,-56(%sp)
+ FLDD 0(%r24),%fr24
+
+ FSTD %fr26,-88(%sp) ; Cycle 2
+
+ XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
+ FSTD %fr28,-104(%sp)
+
+ XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
+ LDD -96(%sp),%r3
+ FSTD %fr29,-72(%sp)
+
+ XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
+ LDD -64(%sp),%r19
+ LDD -80(%sp),%r21
+
+ XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6
+ LDD -56(%sp),%r20
+ ADD %r21,%r3,%r3
+
+ ADD,DC %r20,%r19,%r19 ; Cycle 7
+ LDD -88(%sp),%r4
+ SHRPD %r3,%r0,32,%r21
+ LDD -48(%sp),%r1
+
+ FLDD EIGHT(%r24),%fr28 ; Cycle 8
+ LDD -104(%sp),%r31
+ ADD,DC %r0,%r0,%r20
+ SHRPD %r19,%r3,32,%r3
+
+ LDD -72(%sp),%r29 ; Cycle 9
+ SHRPD %r20,%r19,32,%r20
+ ADD %r21,%r1,%r1
+
+ XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
+ ADD,DC %r3,%r4,%r4
+ FSTD %fr24,-96(%sp)
+
+ XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
+ ADD,DC %r0,%r20,%r20
+ LDD 0(%r23),%r3
+ FSTD %fr25,-80(%sp)
+
+ LDO SIXTEEN(%r24),%r24 ; Cycle 12
+ FSTD %fr31,-64(%sp)
+
+ XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
+ ADD %r0,%r0,%r0 ; clear the carry bit
+ ADDIB,<= -4,%r26,$ENDLOOP ; actually happens in cycle 12
+ FSTD %fr27,-48(%sp)
+; MFCTL %cr16,%r21 ; for timing
+; STD %r21,-112(%sp)
+
+; Here is the loop.
+
+$LOOP XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
+ ADD,DC %r29,%r4,%r4
+ FSTD %fr30,-56(%sp)
+ FLDD 0(%r24),%fr24
+
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ ADD,DC %r0,%r20,%r20
+ FSTD %fr26,-88(%sp)
+
+ XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
+ ADD %r3,%r1,%r1
+ FSTD %fr28,-104(%sp)
+ LDD UN_EIGHT(%r23),%r21
+
+ XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
+ ADD,DC %r21,%r4,%r28
+ FSTD %fr29,-72(%sp)
+ LDD -96(%sp),%r3
+
+ XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
+ ADD,DC %r20,%r31,%r22
+ LDD -64(%sp),%r19
+ LDD -80(%sp),%r21
+
+ XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6
+ ADD %r21,%r3,%r3
+ LDD -56(%sp),%r20
+ STD %r1,UN_SIXTEEN(%r23)
+
+ ADD,DC %r20,%r19,%r19 ; Cycle 7
+ SHRPD %r3,%r0,32,%r21
+ LDD -88(%sp),%r4
+ LDD -48(%sp),%r1
+
+ ADD,DC %r0,%r0,%r20 ; Cycle 8
+ SHRPD %r19,%r3,32,%r3
+ FLDD EIGHT(%r24),%fr28
+ LDD -104(%sp),%r31
+
+ SHRPD %r20,%r19,32,%r20 ; Cycle 9
+ ADD %r21,%r1,%r1
+ STD %r28,UN_EIGHT(%r23)
+ LDD -72(%sp),%r29
+
+ XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
+ ADD,DC %r3,%r4,%r4
+ FSTD %fr24,-96(%sp)
+
+ XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
+ ADD,DC %r0,%r20,%r20
+ FSTD %fr25,-80(%sp)
+ LDD 0(%r23),%r3
+
+ LDO SIXTEEN(%r24),%r24 ; Cycle 12
+ FSTD %fr31,-64(%sp)
+
+ XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
+ ADD %r22,%r1,%r1
+ ADDIB,> -2,%r26,$LOOP ; actually happens in cycle 12
+ FSTD %fr27,-48(%sp)
+
+$ENDLOOP
+
+; Shutdown code, first stage.
+
+; MFCTL %cr16,%r21 ; for timing
+; STD %r21,UN_SIXTEEN(%r23)
+; LDD -112(%sp),%r21
+; STD %r21,UN_EIGHT(%r23)
+
+ XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
+ ADD,DC %r29,%r4,%r4
+ CMPIB,= 0,%r26,$ONEMORE
+ FSTD %fr30,-56(%sp)
+
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ ADD,DC %r0,%r20,%r20
+ FSTD %fr26,-88(%sp)
+
+ ADD %r3,%r1,%r1 ; Cycle 3
+ FSTD %fr28,-104(%sp)
+ LDD UN_EIGHT(%r23),%r21
+
+ ADD,DC %r21,%r4,%r28 ; Cycle 4
+ FSTD %fr29,-72(%sp)
+ STD %r28,UN_EIGHT(%r23) ; moved up from cycle 9
+ LDD -96(%sp),%r3
+
+ ADD,DC %r20,%r31,%r22 ; Cycle 5
+ STD %r1,UN_SIXTEEN(%r23)
+$JOIN4
+ LDD -64(%sp),%r19
+ LDD -80(%sp),%r21
+
+ ADD %r21,%r3,%r3 ; Cycle 6
+ LDD -56(%sp),%r20
+
+ ADD,DC %r20,%r19,%r19 ; Cycle 7
+ SHRPD %r3,%r0,32,%r21
+ LDD -88(%sp),%r4
+ LDD -48(%sp),%r1
+
+ ADD,DC %r0,%r0,%r20 ; Cycle 8
+ SHRPD %r19,%r3,32,%r3
+ LDD -104(%sp),%r31
+
+ SHRPD %r20,%r19,32,%r20 ; Cycle 9
+ ADD %r21,%r1,%r1
+ LDD -72(%sp),%r29
+
+ ADD,DC %r3,%r4,%r4 ; Cycle 10
+
+ ADD,DC %r0,%r20,%r20 ; Cycle 11
+ LDD 0(%r23),%r3
+
+ ADD %r22,%r1,%r1 ; Cycle 13
+
+; Shutdown code, second stage.
+
+ ADD,DC %r29,%r4,%r4 ; Cycle 1
+
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ ADD,DC %r0,%r20,%r20
+
+ LDD UN_EIGHT(%r23),%r21 ; Cycle 3
+ ADD %r3,%r1,%r1
+
+ ADD,DC %r21,%r4,%r28 ; Cycle 4
+
+ ADD,DC %r20,%r31,%r22 ; Cycle 5
+
+ STD %r1,UN_SIXTEEN(%r23); Cycle 6
+
+ STD %r28,UN_EIGHT(%r23) ; Cycle 9
+
+ LDD 0(%r23),%r3 ; Cycle 11
+
+; Shutdown code, third stage.
+
+ LDO SIXTEEN(%r23),%r23
+ ADD %r3,%r22,%r1
+$JOIN1 ADD,DC %r0,%r0,%r21
+ CMPIB,*= 0,%r21,$L0 ; if no overflow, exit
+ STD %r1,UN_SIXTEEN(%r23)
+
+; Final carry propagation
+
+$FINAL1 LDO EIGHT(%r23),%r23
+ LDD UN_SIXTEEN(%r23),%r21
+ ADDI 1,%r21,%r21
+ CMPIB,*= 0,%r21,$FINAL1 ; Keep looping if there is a carry.
+ STD %r21,UN_SIXTEEN(%r23)
+ B $L0
+ NOP
+
+; Here is the code that handles the difficult cases N=1, N=2, and N=3.
+; We do the usual trick -- branch out of the startup code at appropriate
+; points, and branch into the shutdown code.
+
+$N_IS_SMALL
+ CMPIB,= 0,%r26,$N_IS_ONE
+ FSTD %fr24,-96(%sp) ; Cycle 10
+ FLDD EIGHT(%r24),%fr28 ; Cycle 8
+ XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
+ XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
+ FSTD %fr25,-80(%sp)
+ FSTD %fr31,-64(%sp) ; Cycle 12
+ XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
+ FSTD %fr27,-48(%sp)
+ XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
+ CMPIB,= 2,%r26,$N_IS_THREE
+ FSTD %fr30,-56(%sp)
+
+; N = 2
+ FSTD %fr26,-88(%sp) ; Cycle 2
+ FSTD %fr28,-104(%sp) ; Cycle 3
+ LDD -96(%sp),%r3 ; Cycle 4
+ FSTD %fr29,-72(%sp)
+ B $JOIN4
+ ADD %r0,%r0,%r22
+
+$N_IS_THREE
+ FLDD SIXTEEN(%r24),%fr24
+ FSTD %fr26,-88(%sp) ; Cycle 2
+ XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
+ FSTD %fr28,-104(%sp)
+ XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
+ LDD -96(%sp),%r3
+ FSTD %fr29,-72(%sp)
+ XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
+ LDD -64(%sp),%r19
+ LDD -80(%sp),%r21
+ B $JOIN3
+ ADD %r0,%r0,%r22
+
+$N_IS_ONE
+ FSTD %fr25,-80(%sp)
+ FSTD %fr27,-48(%sp)
+ FSTD %fr26,-88(%sp) ; Cycle 2
+ B $JOIN5
+ ADD %r0,%r0,%r22
+
+; We came out of the unrolled loop with wrong parity. Do one more
+; single cycle. This is quite tricky, because of the way the
+; carry chains and SHRPD chains have been chopped up.
+
+$ONEMORE
+
+ FLDD 0(%r24),%fr24
+
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ ADD,DC %r0,%r20,%r20
+ FSTD %fr26,-88(%sp)
+
+ XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
+ FSTD %fr28,-104(%sp)
+ LDD UN_EIGHT(%r23),%r21
+ ADD %r3,%r1,%r1
+
+ XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
+ ADD,DC %r21,%r4,%r28
+ STD %r28,UN_EIGHT(%r23) ; moved from cycle 9
+ LDD -96(%sp),%r3
+ FSTD %fr29,-72(%sp)
+
+ XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
+ ADD,DC %r20,%r31,%r22
+ LDD -64(%sp),%r19
+ LDD -80(%sp),%r21
+
+ STD %r1,UN_SIXTEEN(%r23); Cycle 6
+$JOIN3
+ XMPYU %fr9L,%fr24R,%fr24
+ LDD -56(%sp),%r20
+ ADD %r21,%r3,%r3
+
+ ADD,DC %r20,%r19,%r19 ; Cycle 7
+ LDD -88(%sp),%r4
+ SHRPD %r3,%r0,32,%r21
+ LDD -48(%sp),%r1
+
+ LDD -104(%sp),%r31 ; Cycle 8
+ ADD,DC %r0,%r0,%r20
+ SHRPD %r19,%r3,32,%r3
+
+ LDD -72(%sp),%r29 ; Cycle 9
+ SHRPD %r20,%r19,32,%r20
+ ADD %r21,%r1,%r1
+
+ ADD,DC %r3,%r4,%r4 ; Cycle 10
+ FSTD %fr24,-96(%sp)
+
+ ADD,DC %r0,%r20,%r20 ; Cycle 11
+ LDD 0(%r23),%r3
+ FSTD %fr25,-80(%sp)
+
+ ADD %r22,%r1,%r1 ; Cycle 13
+ FSTD %fr27,-48(%sp)
+
+; Shutdown code, stage 1-1/2.
+
+ ADD,DC %r29,%r4,%r4 ; Cycle 1
+
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ ADD,DC %r0,%r20,%r20
+ FSTD %fr26,-88(%sp)
+
+ LDD UN_EIGHT(%r23),%r21 ; Cycle 3
+ ADD %r3,%r1,%r1
+
+ ADD,DC %r21,%r4,%r28 ; Cycle 4
+ STD %r28,UN_EIGHT(%r23) ; moved from cycle 9
+
+ ADD,DC %r20,%r31,%r22 ; Cycle 5
+ STD %r1,UN_SIXTEEN(%r23)
+$JOIN5
+ LDD -96(%sp),%r3 ; moved from cycle 4
+ LDD -80(%sp),%r21
+ ADD %r21,%r3,%r3 ; Cycle 6
+ ADD,DC %r0,%r0,%r19 ; Cycle 7
+ LDD -88(%sp),%r4
+ SHRPD %r3,%r0,32,%r21
+ LDD -48(%sp),%r1
+ SHRPD %r19,%r3,32,%r3 ; Cycle 8
+ ADD %r21,%r1,%r1 ; Cycle 9
+ ADD,DC %r3,%r4,%r4 ; Cycle 10
+ LDD 0(%r23),%r3 ; Cycle 11
+ ADD %r22,%r1,%r1 ; Cycle 13
+
+; Shutdown code, stage 2-1/2.
+
+ ADD,DC %r0,%r4,%r4 ; Cycle 1
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ LDD UN_EIGHT(%r23),%r21 ; Cycle 3
+ ADD %r3,%r1,%r1
+ STD %r1,UN_SIXTEEN(%r23)
+ ADD,DC %r21,%r4,%r1
+ B $JOIN1
+ LDO EIGHT(%r23),%r23
+
+; exit
+
+$L0
+ LDW -124(%sp),%r4
+ BVE (%r2)
+ .EXIT
+ LDW,MB -128(%sp),%r3
+
+ .PROCEND
+
+; ***************************************************************
+;
+; add_diag_[little/big]
+;
+; ***************************************************************
+
+; The arguments are as follows:
+; r2 return PC, of course
+; r26 = arg1 = length
+; r25 = arg2 = vector to square
+; r24 = arg3 = result vector
+
+#ifdef LITTLE_WORDIAN
+add_diag_little
+#else
+add_diag_big
+#endif
+ .PROC
+ .CALLINFO FRAME=120,ENTRY_GR=4
+ .ENTRY
+ STW,MA %r3,128(%sp)
+ STW %r4,-124(%sp)
+
+ ADDIB,< -1,%r26,$Z0 ; If N=0, exit immediately.
+ NOP
+
+; Startup code
+
+ FLDD 0(%r25),%fr7 ; Cycle 2 (alternate body)
+ XMPYU %fr7R,%fr7R,%fr29 ; Cycle 4
+ XMPYU %fr7L,%fr7R,%fr27 ; Cycle 5
+ XMPYU %fr7L,%fr7L,%fr30
+ LDO SIXTEEN(%r25),%r25 ; Cycle 6
+ FSTD %fr29,-88(%sp)
+ FSTD %fr27,-72(%sp) ; Cycle 7
+ CMPIB,= 0,%r26,$DIAG_N_IS_ONE ; Cycle 1 (main body)
+ FSTD %fr30,-96(%sp)
+ FLDD UN_EIGHT(%r25),%fr7 ; Cycle 2
+ LDD -88(%sp),%r22 ; Cycle 3
+ LDD -72(%sp),%r31 ; Cycle 4
+ XMPYU %fr7R,%fr7R,%fr28
+ XMPYU %fr7L,%fr7R,%fr24 ; Cycle 5
+ XMPYU %fr7L,%fr7L,%fr31
+ LDD -96(%sp),%r20 ; Cycle 6
+ FSTD %fr28,-80(%sp)
+ ADD %r0,%r0,%r0 ; clear the carry bit
+ ADDIB,<= -2,%r26,$ENDDIAGLOOP ; Cycle 7
+ FSTD %fr24,-64(%sp)
+
+; Here is the loop. It is unrolled twice, modelled after the "alternate body" and then the "main body".
+
+$DIAGLOOP
+ SHRPD %r31,%r0,31,%r3 ; Cycle 1 (alternate body)
+ LDO SIXTEEN(%r25),%r25
+ LDD 0(%r24),%r1
+ FSTD %fr31,-104(%sp)
+ SHRPD %r0,%r31,31,%r4 ; Cycle 2
+ ADD,DC %r22,%r3,%r3
+ FLDD UN_SIXTEEN(%r25),%fr7
+ ADD,DC %r0,%r20,%r20 ; Cycle 3
+ ADD %r1,%r3,%r3
+ XMPYU %fr7R,%fr7R,%fr29 ; Cycle 4
+ LDD -80(%sp),%r21
+ STD %r3,0(%r24)
+ XMPYU %fr7L,%fr7R,%fr27 ; Cycle 5
+ XMPYU %fr7L,%fr7L,%fr30
+ LDD -64(%sp),%r29
+ LDD EIGHT(%r24),%r1
+ ADD,DC %r4,%r20,%r20 ; Cycle 6
+ LDD -104(%sp),%r19
+ FSTD %fr29,-88(%sp)
+ ADD %r20,%r1,%r1 ; Cycle 7
+ FSTD %fr27,-72(%sp)
+ SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body)
+ LDO THIRTY_TWO(%r24),%r24
+ LDD UN_SIXTEEN(%r24),%r28
+ FSTD %fr30,-96(%sp)
+ SHRPD %r0,%r29,31,%r3 ; Cycle 2
+ ADD,DC %r21,%r4,%r4
+ FLDD UN_EIGHT(%r25),%fr7
+ STD %r1,UN_TWENTY_FOUR(%r24)
+ ADD,DC %r0,%r19,%r19 ; Cycle 3
+ ADD %r28,%r4,%r4
+ XMPYU %fr7R,%fr7R,%fr28 ; Cycle 4
+ LDD -88(%sp),%r22
+ STD %r4,UN_SIXTEEN(%r24)
+ XMPYU %fr7L,%fr7R,%fr24 ; Cycle 5
+ XMPYU %fr7L,%fr7L,%fr31
+ LDD -72(%sp),%r31
+ LDD UN_EIGHT(%r24),%r28
+ ADD,DC %r3,%r19,%r19 ; Cycle 6
+ LDD -96(%sp),%r20
+ FSTD %fr28,-80(%sp)
+ ADD %r19,%r28,%r28 ; Cycle 7
+ FSTD %fr24,-64(%sp)
+ ADDIB,> -2,%r26,$DIAGLOOP ; Cycle 8
+ STD %r28,UN_EIGHT(%r24)
+
+$ENDDIAGLOOP
+
+ ADD,DC %r0,%r22,%r22
+ CMPIB,= 0,%r26,$ONEMOREDIAG
+ SHRPD %r31,%r0,31,%r3
+
+; Shutdown code, first stage.
+
+ FSTD %fr31,-104(%sp) ; Cycle 1 (alternate body)
+ LDD 0(%r24),%r28
+ SHRPD %r0,%r31,31,%r4 ; Cycle 2
+ ADD %r3,%r22,%r3
+ ADD,DC %r0,%r20,%r20 ; Cycle 3
+ LDD -80(%sp),%r21
+ ADD %r3,%r28,%r3
+ LDD -64(%sp),%r29 ; Cycle 4
+ STD %r3,0(%r24)
+ LDD EIGHT(%r24),%r1 ; Cycle 5
+ LDO SIXTEEN(%r25),%r25 ; Cycle 6
+ LDD -104(%sp),%r19
+ ADD,DC %r4,%r20,%r20
+ ADD %r20,%r1,%r1 ; Cycle 7
+ ADD,DC %r0,%r21,%r21 ; Cycle 8
+ STD %r1,EIGHT(%r24)
+
+; Shutdown code, second stage.
+
+ SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body)
+ LDO THIRTY_TWO(%r24),%r24
+ LDD UN_SIXTEEN(%r24),%r1
+ SHRPD %r0,%r29,31,%r3 ; Cycle 2
+ ADD %r4,%r21,%r4
+ ADD,DC %r0,%r19,%r19 ; Cycle 3
+ ADD %r4,%r1,%r4
+ STD %r4,UN_SIXTEEN(%r24); Cycle 4
+ LDD UN_EIGHT(%r24),%r28 ; Cycle 5
+ ADD,DC %r3,%r19,%r19 ; Cycle 6
+ ADD %r19,%r28,%r28 ; Cycle 7
+ ADD,DC %r0,%r0,%r22 ; Cycle 8
+ CMPIB,*= 0,%r22,$Z0 ; if no overflow, exit
+ STD %r28,UN_EIGHT(%r24)
+
+; Final carry propagation
+
+$FDIAG2
+ LDO EIGHT(%r24),%r24
+ LDD UN_EIGHT(%r24),%r26
+ ADDI 1,%r26,%r26
+ CMPIB,*= 0,%r26,$FDIAG2 ; Keep looping if there is a carry.
+ STD %r26,UN_EIGHT(%r24)
+
+ B $Z0
+ NOP
+
+; Here is the code that handles the difficult case N=1.
+; We do the usual trick -- branch out of the startup code at appropriate
+; points, and branch into the shutdown code.
+
+$DIAG_N_IS_ONE
+
+ LDD -88(%sp),%r22
+ LDD -72(%sp),%r31
+ B $JOINDIAG
+ LDD -96(%sp),%r20
+
+; We came out of the unrolled loop with wrong parity. Do one more
+; single cycle. This is the "alternate body". It will, of course,
+; give us opposite registers from the other case, so we need
+; completely different shutdown code.
+
+$ONEMOREDIAG
+ FSTD %fr31,-104(%sp) ; Cycle 1 (alternate body)
+ LDD 0(%r24),%r28
+ FLDD 0(%r25),%fr7 ; Cycle 2
+ SHRPD %r0,%r31,31,%r4
+ ADD %r3,%r22,%r3
+ ADD,DC %r0,%r20,%r20 ; Cycle 3
+ LDD -80(%sp),%r21
+ ADD %r3,%r28,%r3
+ LDD -64(%sp),%r29 ; Cycle 4
+ STD %r3,0(%r24)
+ XMPYU %fr7R,%fr7R,%fr29
+ LDD EIGHT(%r24),%r1 ; Cycle 5
+ XMPYU %fr7L,%fr7R,%fr27
+ XMPYU %fr7L,%fr7L,%fr30
+ LDD -104(%sp),%r19 ; Cycle 6
+ FSTD %fr29,-88(%sp)
+ ADD,DC %r4,%r20,%r20
+ FSTD %fr27,-72(%sp) ; Cycle 7
+ ADD %r20,%r1,%r1
+ ADD,DC %r0,%r21,%r21 ; Cycle 8
+ STD %r1,EIGHT(%r24)
+
+; Shutdown code, first stage.
+
+ SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body)
+ LDO THIRTY_TWO(%r24),%r24
+ FSTD %fr30,-96(%sp)
+ LDD UN_SIXTEEN(%r24),%r1
+ SHRPD %r0,%r29,31,%r3 ; Cycle 2
+ ADD %r4,%r21,%r4
+ ADD,DC %r0,%r19,%r19 ; Cycle 3
+ LDD -88(%sp),%r22
+ ADD %r4,%r1,%r4
+ LDD -72(%sp),%r31 ; Cycle 4
+ STD %r4,UN_SIXTEEN(%r24)
+ LDD UN_EIGHT(%r24),%r28 ; Cycle 5
+ LDD -96(%sp),%r20 ; Cycle 6
+ ADD,DC %r3,%r19,%r19
+ ADD %r19,%r28,%r28 ; Cycle 7
+ ADD,DC %r0,%r22,%r22 ; Cycle 8
+ STD %r28,UN_EIGHT(%r24)
+
+; Shutdown code, second stage.
+
+$JOINDIAG
+ SHRPD %r31,%r0,31,%r3 ; Cycle 1 (alternate body)
+ LDD 0(%r24),%r28
+ SHRPD %r0,%r31,31,%r4 ; Cycle 2
+ ADD %r3,%r22,%r3
+ ADD,DC %r0,%r20,%r20 ; Cycle 3
+ ADD %r3,%r28,%r3
+ STD %r3,0(%r24) ; Cycle 4
+ LDD EIGHT(%r24),%r1 ; Cycle 5
+ ADD,DC %r4,%r20,%r20
+ ADD %r20,%r1,%r1 ; Cycle 7
+ ADD,DC %r0,%r0,%r21 ; Cycle 8
+ CMPIB,*= 0,%r21,$Z0 ; if no overflow, exit
+ STD %r1,EIGHT(%r24)
+
+; Final carry propagation
+
+$FDIAG1
+ LDO EIGHT(%r24),%r24
+ LDD EIGHT(%r24),%r26
+ ADDI 1,%r26,%r26
+ CMPIB,*= 0,%r26,$FDIAG1 ; Keep looping if there is a carry.
+ STD %r26,EIGHT(%r24)
+
+$Z0
+ LDW -124(%sp),%r4
+ BVE (%r2)
+ .EXIT
+ LDW,MB -128(%sp),%r3
+ .PROCEND
+; .ALLOW
+
+ .SPACE $TEXT$
+ .SUBSPA $CODE$
+#ifdef LITTLE_WORDIAN
+#ifdef __GNUC__
+; GNU-as (as of 2.19) does not support LONG_RETURN
+ .EXPORT maxpy_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
+ .EXPORT add_diag_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR
+#else
+ .EXPORT maxpy_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN
+ .EXPORT add_diag_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN
+#endif
+#else
+ .EXPORT maxpy_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN
+ .EXPORT add_diag_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN
+#endif
+ .END
+
+
+; How to use "maxpy_PA20_little" and "maxpy_PA20_big"
+;
+; The routine "maxpy_PA20_little" or "maxpy_PA20_big"
+; performs a 64-bit x any-size multiply, and adds the
+; result to an area of memory. That is, it performs
+; something like
+;
+; A B C D
+; * Z
+; __________
+; P Q R S T
+;
+; and then adds the "PQRST" vector into an area of memory,
+; handling all carries.
+;
+; Digression on nomenclature and endian-ness:
+;
+; Each of the capital letters in the above represents a 64-bit
+; quantity. That is, you could think of the discussion as
+; being in terms of radix-16-quintillion arithmetic. The data
+; type being manipulated is "unsigned long long int". This
+; requires the 64-bit extension of the HP-UX C compiler,
+; available at release 10. You need these compiler flags to
+; enable these extensions:
+;
+; -Aa +e +DA2.0 +DS2.0
+;
+; (The first specifies ANSI C, the second enables the
+; extensions, which are beyond ANSI C, and the third and
+; fourth tell the compiler to use whatever features of the
+; PA2.0 architecture it wishes, in order to made the code more
+; efficient. Since the presence of the assembly code will
+; make the program unable to run on anything less than PA2.0,
+; you might as well gain the performance enhancements in the C
+; code as well.)
+;
+; Questions of "endian-ness" often come up, usually in the
+; context of byte ordering in a word. These routines have a
+; similar issue, that could be called "wordian-ness".
+; Independent of byte ordering (PA is always big-endian), one
+; can make two choices when representing extremely large
+; numbers as arrays of 64-bit doublewords in memory.
+;
+; "Little-wordian" layout means that the least significant
+; word of a number is stored at the lowest address.
+;
+; MSW LSW
+; | |
+; V V
+;
+; A B C D E
+;
+; ^ ^ ^
+; | | |____ address 0
+; | |
+; | |_______address 8
+; |
+; address 32
+;
+; "Big-wordian" means that the most significant word is at the
+; lowest address.
+;
+; MSW LSW
+; | |
+; V V
+;
+; A B C D E
+;
+; ^ ^ ^
+; | | |____ address 32
+; | |
+; | |_______address 24
+; |
+; address 0
+;
+; When you compile the file, you must specify one or the other, with
+; a switch "-DLITTLE_WORDIAN" or "-DBIG_WORDIAN".
+;
+; Incidentally, you assemble this file as part of your
+; project with the same C compiler as the rest of the program.
+; My "makefile" for a superprecision arithmetic package has
+; the following stuff:
+;
+; # definitions:
+; CC = cc -Aa +e -z +DA2.0 +DS2.0 +w1
+; CFLAGS = +O3
+; LDFLAGS = -L /usr/lib -Wl,-aarchive
+;
+; # general build rule for ".s" files:
+; .s.o:
+; $(CC) $(CFLAGS) -c $< -DBIG_WORDIAN
+;
+; # Now any bind step that calls for pa20.o will assemble pa20.s
+;
+; End of digression, back to arithmetic:
+;
+; The way we multiply two huge numbers is, of course, to multiply
+; the "ABCD" vector by each of the "WXYZ" doublewords, adding
+; the result vectors with increasing offsets, the way we learned
+; in school, back before we all used calculators:
+;
+; A B C D
+; * W X Y Z
+; __________
+; P Q R S T
+; E F G H I
+; M N O P Q
+; + R S T U V
+; _______________
+; F I N A L S U M
+;
+; So we call maxpy_PA20_big (in my case; my package is
+; big-wordian) repeatedly, giving the W, X, Y, and Z arguments
+; in turn as the "scalar", and giving the "ABCD" vector each
+; time. We direct it to add its result into an area of memory
+; that we have cleared at the start. We skew the exact
+; location into that area with each call.
+;
+; The prototype for the function is
+;
+; extern void maxpy_PA20_big(
+; int length, /* Number of doublewords in the multiplicand vector. */
+; const long long int *scalaraddr, /* Address to fetch the scalar. */
+; const long long int *multiplicand, /* The multiplicand vector. */
+; long long int *result); /* Where to accumulate the result. */
+;
+; (You should place a copy of this prototype in an include file
+; or in your C file.)
+;
+; Now, IN ALL CASES, the given address for the multiplicand or
+; the result is that of the LEAST SIGNIFICANT DOUBLEWORD.
+; That word is, of course, the word at which the routine
+; starts processing. "maxpy_PA20_little" then increases the
+; addresses as it computes. "maxpy_PA20_big" decreases them.
+;
+; In our example above, "length" would be 4 in each case.
+; "multiplicand" would be the "ABCD" vector. Specifically,
+; the address of the element "D". "scalaraddr" would be the
+; address of "W", "X", "Y", or "Z" on the four calls that we
+; would make. (The order doesn't matter, of course.)
+; "result" would be the appropriate address in the result
+; area. When multiplying by "Z", that would be the least
+; significant word. When multiplying by "Y", it would be the
+; next higher word (8 bytes higher if little-wordian; 8 bytes
+; lower if big-wordian), and so on. The size of the result
+; area must be the the sum of the sizes of the multiplicand
+; and multiplier vectors, and must be initialized to zero
+; before we start.
+;
+; Whenever the routine adds its partial product into the result
+; vector, it follows carry chains as far as they need to go.
+;
+; Here is the super-precision multiply routine that I use for
+; my package. The package is big-wordian. I have taken out
+; handling of exponents (it's a floating point package):
+;
+; static void mul_PA20(
+; int size,
+; const long long int *arg1,
+; const long long int *arg2,
+; long long int *result)
+; {
+; int i;
+;
+; for (i=0 ; i<2*size ; i++) result[i] = 0ULL;
+;
+; for (i=0 ; i<size ; i++) {
+; maxpy_PA20_big(size, &arg2[i], &arg1[size-1], &result[size+i]);
+; }
+; }
diff --git a/security/nss/lib/freebl/mpi/logtab.h b/security/nss/lib/freebl/mpi/logtab.h
new file mode 100644
index 0000000000..24cb13c5b7
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/logtab.h
@@ -0,0 +1,28 @@
+/*
+ * logtab.h
+ *
+ * Arbitrary precision integer arithmetic library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+const float s_logv_2[] = {
+ 0.000000000f, 0.000000000f, 1.000000000f, 0.630929754f, /* 0 1 2 3 */
+ 0.500000000f, 0.430676558f, 0.386852807f, 0.356207187f, /* 4 5 6 7 */
+ 0.333333333f, 0.315464877f, 0.301029996f, 0.289064826f, /* 8 9 10 11 */
+ 0.278942946f, 0.270238154f, 0.262649535f, 0.255958025f, /* 12 13 14 15 */
+ 0.250000000f, 0.244650542f, 0.239812467f, 0.235408913f, /* 16 17 18 19 */
+ 0.231378213f, 0.227670249f, 0.224243824f, 0.221064729f, /* 20 21 22 23 */
+ 0.218104292f, 0.215338279f, 0.212746054f, 0.210309918f, /* 24 25 26 27 */
+ 0.208014598f, 0.205846832f, 0.203795047f, 0.201849087f, /* 28 29 30 31 */
+ 0.200000000f, 0.198239863f, 0.196561632f, 0.194959022f, /* 32 33 34 35 */
+ 0.193426404f, 0.191958720f, 0.190551412f, 0.189200360f, /* 36 37 38 39 */
+ 0.187901825f, 0.186652411f, 0.185449023f, 0.184288833f, /* 40 41 42 43 */
+ 0.183169251f, 0.182087900f, 0.181042597f, 0.180031327f, /* 44 45 46 47 */
+ 0.179052232f, 0.178103594f, 0.177183820f, 0.176291434f, /* 48 49 50 51 */
+ 0.175425064f, 0.174583430f, 0.173765343f, 0.172969690f, /* 52 53 54 55 */
+ 0.172195434f, 0.171441601f, 0.170707280f, 0.169991616f, /* 56 57 58 59 */
+ 0.169293808f, 0.168613099f, 0.167948779f, 0.167300179f, /* 60 61 62 63 */
+ 0.166666667f
+};
diff --git a/security/nss/lib/freebl/mpi/montmulf.c b/security/nss/lib/freebl/mpi/montmulf.c
new file mode 100644
index 0000000000..ce8fbc31d2
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulf.c
@@ -0,0 +1,286 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef SOLARIS
+#define RF_INLINE_MACROS 1
+#endif
+
+static const double TwoTo16 = 65536.0;
+static const double TwoToMinus16 = 1.0 / 65536.0;
+static const double Zero = 0.0;
+static const double TwoTo32 = 65536.0 * 65536.0;
+static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0);
+
+#ifdef RF_INLINE_MACROS
+
+double upper32(double);
+double lower32(double, double);
+double mod(double, double, double);
+
+void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/,
+ const double * /* 2^16*/,
+ const double * /* 0 */,
+ double * /*result16*/,
+ double * /* result32 */,
+ float * /*source - should be unsigned int* converted to float* */);
+
+#else
+#ifdef MP_USE_FLOOR
+#include <math.h>
+#else
+#define floor(d) ((double)((unsigned long long)(d)))
+#endif
+
+static double
+upper32(double x)
+{
+ return floor(x * TwoToMinus32);
+}
+
+static double
+lower32(double x, double y)
+{
+ return x - TwoTo32 * floor(x * TwoToMinus32);
+}
+
+static double
+mod(double x, double oneoverm, double m)
+{
+ return x - m * floor(x * oneoverm);
+}
+
+#endif
+
+static void
+cleanup(double *dt, int from, int tlen)
+{
+ int i;
+ double tmp, tmp1, x, x1;
+
+ tmp = tmp1 = Zero;
+ /* original code **
+ for(i=2*from;i<2*tlen-2;i++)
+ {
+ x=dt[i];
+ dt[i]=lower32(x,Zero)+tmp1;
+ tmp1=tmp;
+ tmp=upper32(x);
+ }
+ dt[tlen-2]+=tmp1;
+ dt[tlen-1]+=tmp;
+ **end original code ***/
+ /* new code ***/
+ for (i = 2 * from; i < 2 * tlen; i += 2) {
+ x = dt[i];
+ x1 = dt[i + 1];
+ dt[i] = lower32(x, Zero) + tmp;
+ dt[i + 1] = lower32(x1, Zero) + tmp1;
+ tmp = upper32(x);
+ tmp1 = upper32(x1);
+ }
+ /** end new code **/
+}
+
+void
+conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
+{
+ int i;
+ long long t, t1, a, b, c, d;
+
+ t1 = 0;
+ a = (long long)d16[0];
+ b = (long long)d16[1];
+ for (i = 0; i < ilen - 1; i++) {
+ c = (long long)d16[2 * i + 2];
+ t1 += (unsigned int)a;
+ t = (a >> 32);
+ d = (long long)d16[2 * i + 3];
+ t1 += (b & 0xffff) << 16;
+ t += (b >> 16) + (t1 >> 32);
+ i32[i] = (unsigned int)t1;
+ t1 = t;
+ a = c;
+ b = d;
+ }
+ t1 += (unsigned int)a;
+ t = (a >> 32);
+ t1 += (b & 0xffff) << 16;
+ i32[i] = (unsigned int)t1;
+}
+
+void
+conv_i32_to_d32(double *d32, unsigned int *i32, int len)
+{
+ int i;
+
+#pragma pipeloop(0)
+ for (i = 0; i < len; i++)
+ d32[i] = (double)(i32[i]);
+}
+
+void
+conv_i32_to_d16(double *d16, unsigned int *i32, int len)
+{
+ int i;
+ unsigned int a;
+
+#pragma pipeloop(0)
+ for (i = 0; i < len; i++) {
+ a = i32[i];
+ d16[2 * i] = (double)(a & 0xffff);
+ d16[2 * i + 1] = (double)(a >> 16);
+ }
+}
+
+void
+conv_i32_to_d32_and_d16(double *d32, double *d16,
+ unsigned int *i32, int len)
+{
+ int i = 0;
+ unsigned int a;
+
+#pragma pipeloop(0)
+#ifdef RF_INLINE_MACROS
+ for (; i < len - 3; i += 4) {
+ i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
+ &(d16[2 * i]), &(d32[i]), (float *)(&(i32[i])));
+ }
+#endif
+ for (; i < len; i++) {
+ a = i32[i];
+ d32[i] = (double)(i32[i]);
+ d16[2 * i] = (double)(a & 0xffff);
+ d16[2 * i + 1] = (double)(a >> 16);
+ }
+}
+
+void
+adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
+{
+ long long acc;
+ int i;
+
+ if (i32[len] > 0)
+ i = -1;
+ else {
+ for (i = len - 1; i >= 0; i--) {
+ if (i32[i] != nint[i])
+ break;
+ }
+ }
+ if ((i < 0) || (i32[i] > nint[i])) {
+ acc = 0;
+ for (i = 0; i < len; i++) {
+ acc = acc + (unsigned long long)(i32[i]) - (unsigned long long)(nint[i]);
+ i32[i] = (unsigned int)acc;
+ acc = acc >> 32;
+ }
+ }
+}
+
+/*
+** the lengths of the input arrays should be at least the following:
+** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
+** all of them should be different from one another
+**
+*/
+void
+mont_mulf_noconv(unsigned int *result,
+ double *dm1, double *dm2, double *dt,
+ double *dn, unsigned int *nint,
+ int nlen, double dn0)
+{
+ int i, j, jj;
+ int tmp;
+ double digit, m2j, nextm2j, a, b;
+ double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
+
+ pdm1 = &(dm1[0]);
+ pdm2 = &(dm2[0]);
+ pdn = &(dn[0]);
+ pdm2[2 * nlen] = Zero;
+
+ if (nlen != 16) {
+ for (i = 0; i < 4 * nlen + 2; i++)
+ dt[i] = Zero;
+
+ a = dt[0] = pdm1[0] * pdm2[0];
+ digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
+
+ pdtj = &(dt[0]);
+ for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) {
+ m2j = pdm2[j];
+ a = pdtj[0] + pdn[0] * digit;
+ b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16;
+ pdtj[1] = b;
+
+#pragma pipeloop(0)
+ for (i = 1; i < nlen; i++) {
+ pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit;
+ }
+ if ((jj == 30)) {
+ cleanup(dt, j / 2 + 1, 2 * nlen + 1);
+ jj = 0;
+ }
+
+ digit = mod(lower32(b, Zero) * dn0, TwoToMinus16, TwoTo16);
+ }
+ } else {
+ a = dt[0] = pdm1[0] * pdm2[0];
+
+ dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] =
+ dt[59] = dt[58] = dt[57] = dt[56] = dt[55] = dt[54] =
+ dt[53] = dt[52] = dt[51] = dt[50] = dt[49] = dt[48] =
+ dt[47] = dt[46] = dt[45] = dt[44] = dt[43] = dt[42] =
+ dt[41] = dt[40] = dt[39] = dt[38] = dt[37] = dt[36] =
+ dt[35] = dt[34] = dt[33] = dt[32] = dt[31] = dt[30] =
+ dt[29] = dt[28] = dt[27] = dt[26] = dt[25] = dt[24] =
+ dt[23] = dt[22] = dt[21] = dt[20] = dt[19] = dt[18] =
+ dt[17] = dt[16] = dt[15] = dt[14] = dt[13] = dt[12] =
+ dt[11] = dt[10] = dt[9] = dt[8] = dt[7] = dt[6] =
+ dt[5] = dt[4] = dt[3] = dt[2] = dt[1] = Zero;
+
+ pdn_0 = pdn[0];
+ pdm1_0 = pdm1[0];
+
+ digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
+ pdtj = &(dt[0]);
+
+ for (j = 0; j < 32; j++, pdtj++) {
+
+ m2j = pdm2[j];
+ a = pdtj[0] + pdn_0 * digit;
+ b = pdtj[1] + pdm1_0 * pdm2[j + 1] + a * TwoToMinus16;
+ pdtj[1] = b;
+
+ /**** this loop will be fully unrolled:
+ for(i=1;i<16;i++)
+ {
+ pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+ }
+ *************************************/
+ pdtj[2] += pdm1[1] * m2j + pdn[1] * digit;
+ pdtj[4] += pdm1[2] * m2j + pdn[2] * digit;
+ pdtj[6] += pdm1[3] * m2j + pdn[3] * digit;
+ pdtj[8] += pdm1[4] * m2j + pdn[4] * digit;
+ pdtj[10] += pdm1[5] * m2j + pdn[5] * digit;
+ pdtj[12] += pdm1[6] * m2j + pdn[6] * digit;
+ pdtj[14] += pdm1[7] * m2j + pdn[7] * digit;
+ pdtj[16] += pdm1[8] * m2j + pdn[8] * digit;
+ pdtj[18] += pdm1[9] * m2j + pdn[9] * digit;
+ pdtj[20] += pdm1[10] * m2j + pdn[10] * digit;
+ pdtj[22] += pdm1[11] * m2j + pdn[11] * digit;
+ pdtj[24] += pdm1[12] * m2j + pdn[12] * digit;
+ pdtj[26] += pdm1[13] * m2j + pdn[13] * digit;
+ pdtj[28] += pdm1[14] * m2j + pdn[14] * digit;
+ pdtj[30] += pdm1[15] * m2j + pdn[15] * digit;
+ /* no need for cleenup, cannot overflow */
+ digit = mod(lower32(b, Zero) * dn0, TwoToMinus16, TwoTo16);
+ }
+ }
+
+ conv_d16_to_i32(result, dt + 2 * nlen, (long long *)dt, nlen + 1);
+
+ adjust_montf_result(result, nint, nlen);
+}
diff --git a/security/nss/lib/freebl/mpi/montmulf.h b/security/nss/lib/freebl/mpi/montmulf.h
new file mode 100644
index 0000000000..69bed4acb1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulf.h
@@ -0,0 +1,65 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* The functions that are to be called from outside of the .s file have the
+ * following interfaces and array size requirements:
+ */
+
+void conv_i32_to_d32(double *d32, unsigned int *i32, int len);
+
+/* Converts an array of int's to an array of doubles, so that each double
+ * corresponds to an int. len is the number of items converted.
+ * Does not allocate the output array.
+ * The pointers d32 and i32 should point to arrays of size at least len
+ * (doubles and unsigned ints, respectively)
+ */
+
+void conv_i32_to_d16(double *d16, unsigned int *i32, int len);
+
+/* Converts an array of int's to an array of doubles so that each element
+ * of the int array is converted to a pair of doubles, the first one
+ * corresponding to the lower (least significant) 16 bits of the int and
+ * the second one corresponding to the upper (most significant) 16 bits of
+ * the 32-bit int. len is the number of ints converted.
+ * Does not allocate the output array.
+ * The pointer d16 should point to an array of doubles of size at least
+ * 2*len and i32 should point an array of ints of size at least len
+ */
+
+void conv_i32_to_d32_and_d16(double *d32, double *d16,
+ unsigned int *i32, int len);
+
+/* Does the above two conversions together, it is much faster than doing
+ * both of those in succession
+ */
+
+void mont_mulf_noconv(unsigned int *result,
+ double *dm1, double *dm2, double *dt,
+ double *dn, unsigned int *nint,
+ int nlen, double dn0);
+
+/* Does the Montgomery multiplication of the numbers stored in the arrays
+ * pointed to by dm1 and dm2, writing the result to the array pointed to by
+ * result. It uses the array pointed to by dt as a temporary work area.
+ * nint should point to the modulus in the array-of-integers representation,
+ * dn should point to its array-of-doubles as obtained as a result of the
+ * function call conv_i32_to_d32(dn, nint, nlen);
+ * nlen is the length of the array containing the modulus.
+ * The representation used for dm1 is the one that is a result of the function
+ * call conv_i32_to_d32(dm1, m1, nlen), the representation for dm2 is the
+ * result of the function call conv_i32_to_d16(dm2, m2, nlen).
+ * Note that m1 and m2 should both be of length nlen, so they should be
+ * padded with 0's if necessary before the conversion. The result comes in
+ * this form (int representation, padded with 0's).
+ * dn0 is the value of the 16 least significant bits of n0'.
+ * The function does not allocate memory for any of the arrays, so the
+ * pointers should point to arrays with the following minimal sizes:
+ * result - nlen+1
+ * dm1 - nlen
+ * dm2 - 2*nlen+1 ( the +1 is necessary for technical reasons )
+ * dt - 4*nlen+2
+ * dn - nlen
+ * nint - nlen
+ * No two arrays should point to overlapping areas of memory.
+ */
diff --git a/security/nss/lib/freebl/mpi/montmulf.il b/security/nss/lib/freebl/mpi/montmulf.il
new file mode 100644
index 0000000000..4952d0fb82
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulf.il
@@ -0,0 +1,108 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+!
+! double upper32(double /*frs1*/);
+!
+ .inline upper32,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+
+ fdtox %f10,%f10
+ fitod %f10,%f0
+ .end
+
+!
+! double lower32(double /*frs1*/, double /* Zero */);
+!
+ .inline lower32,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f12
+
+ fdtox %f10,%f10
+ fmovs %f12,%f10
+ fxtod %f10,%f0
+ .end
+
+!
+! double mod(double /*x*/, double /*1/m*/, double /*m*/);
+!
+ .inline mod,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f2
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o4,[%sp+0x48]
+ ldd [%sp+0x48],%f6
+
+ fmuld %f2,%f4,%f4
+ fdtox %f4,%f4
+ fxtod %f4,%f4
+ fmuld %f4,%f6,%f4
+ fsubd %f2,%f4,%f0
+ .end
+
+
+!
+! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
+! double * /* 0 */,
+! double * /*result16*/, double * /* result32 */
+! float * /*source - should be unsigned int*
+! converted to float* */);
+!
+ .inline i16_to_d16_and_d32x4,24
+ ldd [%o0],%f2 ! 1/(2^16)
+ ldd [%o1],%f4 ! 2^16
+ ldd [%o2],%f22
+
+ fmovd %f22,%f6
+ ld [%o5],%f7
+ fmovd %f22,%f10
+ ld [%o5+4],%f11
+ fmovd %f22,%f14
+ ld [%o5+8],%f15
+ fmovd %f22,%f18
+ ld [%o5+12],%f19
+ fxtod %f6,%f6
+ std %f6,[%o4]
+ fxtod %f10,%f10
+ std %f10,[%o4+8]
+ fxtod %f14,%f14
+ std %f14,[%o4+16]
+ fxtod %f18,%f18
+ std %f18,[%o4+24]
+ fmuld %f2,%f6,%f8
+ fmuld %f2,%f10,%f12
+ fmuld %f2,%f14,%f16
+ fmuld %f2,%f18,%f20
+ fdtox %f8,%f8
+ fdtox %f12,%f12
+ fdtox %f16,%f16
+ fdtox %f20,%f20
+ fxtod %f8,%f8
+ std %f8,[%o3+8]
+ fxtod %f12,%f12
+ std %f12,[%o3+24]
+ fxtod %f16,%f16
+ std %f16,[%o3+40]
+ fxtod %f20,%f20
+ std %f20,[%o3+56]
+ fmuld %f8,%f4,%f8
+ fmuld %f12,%f4,%f12
+ fmuld %f16,%f4,%f16
+ fmuld %f20,%f4,%f20
+ fsubd %f6,%f8,%f8
+ std %f8,[%o3]
+ fsubd %f10,%f12,%f12
+ std %f12,[%o3+16]
+ fsubd %f14,%f16,%f16
+ std %f16,[%o3+32]
+ fsubd %f18,%f20,%f20
+ std %f20,[%o3+48]
+ .end
+
+
diff --git a/security/nss/lib/freebl/mpi/montmulf.s b/security/nss/lib/freebl/mpi/montmulf.s
new file mode 100644
index 0000000000..69d2a3c51b
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulf.s
@@ -0,0 +1,1938 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .section ".text",#alloc,#execinstr
+ .file "montmulf.c"
+
+ .section ".data",#alloc,#write
+ .align 8
+TwoTo16: /* frequency 1.0 confidence 0.0 */
+ .word 1089470464
+ .word 0
+ .type TwoTo16,#object
+ .size TwoTo16,8
+TwoToMinus16: /* frequency 1.0 confidence 0.0 */
+ .word 1055916032
+ .word 0
+ .type TwoToMinus16,#object
+ .size TwoToMinus16,8
+Zero: /* frequency 1.0 confidence 0.0 */
+ .word 0
+ .word 0
+ .type Zero,#object
+ .size Zero,8
+TwoTo32: /* frequency 1.0 confidence 0.0 */
+ .word 1106247680
+ .word 0
+ .type TwoTo32,#object
+ .size TwoTo32,8
+TwoToMinus32: /* frequency 1.0 confidence 0.0 */
+ .word 1039138816
+ .word 0
+ .type TwoToMinus32,#object
+ .size TwoToMinus32,8
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE cleanup
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global cleanup
+ cleanup: /* frequency 1.0 confidence 0.0 */
+! FILE montmulf.c
+
+! 1 !#define RF_INLINE_MACROS
+! 3 !static double TwoTo16=65536.0;
+! 4 !static double TwoToMinus16=1.0/65536.0;
+! 5 !static double Zero=0.0;
+! 6 !static double TwoTo32=65536.0*65536.0;
+! 7 !static double TwoToMinus32=1.0/(65536.0*65536.0);
+! 9 !#ifdef RF_INLINE_MACROS
+! 11 !double upper32(double);
+! 12 !double lower32(double, double);
+! 13 !double mod(double, double, double);
+! 15 !#else
+! 17 !static double upper32(double x)
+! 18 !{
+! 19 ! return floor(x*TwoToMinus32);
+! 20 !}
+! 22 !static double lower32(double x, double y)
+! 23 !{
+! 24 ! return x-TwoTo32*floor(x*TwoToMinus32);
+! 25 !}
+! 27 !static double mod(double x, double oneoverm, double m)
+! 28 !{
+! 29 ! return x-m*floor(x*oneoverm);
+! 30 !}
+! 32 !#endif
+! 35 !void cleanup(double *dt, int from, int tlen)
+! 36 !{
+! 37 ! int i;
+! 38 ! double tmp,tmp1,x,x1;
+! 40 ! tmp=tmp1=Zero;
+
+/* 000000 40 ( 0 1) */ sethi %hi(Zero),%g2
+
+! 41 ! /* original code **
+! 42 ! for(i=2*from;i<2*tlen-2;i++)
+! 43 ! {
+! 44 ! x=dt[i];
+! 45 ! dt[i]=lower32(x,Zero)+tmp1;
+! 46 ! tmp1=tmp;
+! 47 ! tmp=upper32(x);
+! 48 ! }
+! 49 ! dt[tlen-2]+=tmp1;
+! 50 ! dt[tlen-1]+=tmp;
+! 51 ! **end original code ***/
+! 52 ! /* new code ***/
+! 53 ! for(i=2*from;i<2*tlen;i+=2)
+
+/* 0x0004 53 ( 1 2) */ sll %o2,1,%g3
+/* 0x0008 40 ( 1 4) */ ldd [%g2+%lo(Zero)],%f0
+/* 0x000c ( 1 2) */ add %g2,%lo(Zero),%g2
+/* 0x0010 53 ( 2 3) */ sll %o1,1,%g4
+/* 0x0014 36 ( 3 4) */ sll %o1,4,%g1
+/* 0x0018 40 ( 3 4) */ fmovd %f0,%f4
+/* 0x001c 53 ( 3 4) */ cmp %g4,%g3
+/* 0x0020 ( 3 4) */ bge,pt %icc,.L77000116 ! tprob=0.56
+/* 0x0024 ( 4 5) */ fmovd %f0,%f2
+/* 0x0028 36 ( 4 5) */ add %o0,%g1,%g1
+/* 0x002c ( 4 5) */ sub %g3,1,%g3
+
+! 54 ! {
+! 55 ! x=dt[i];
+
+/* 0x0030 55 ( 5 8) */ ldd [%g1],%f8
+ .L900000114: /* frequency 6.4 confidence 0.0 */
+/* 0x0034 ( 0 3) */ fdtox %f8,%f6
+
+! 56 ! x1=dt[i+1];
+
+/* 0x0038 56 ( 0 3) */ ldd [%g1+8],%f10
+
+! 57 ! dt[i]=lower32(x,Zero)+tmp;
+! 58 ! dt[i+1]=lower32(x1,Zero)+tmp1;
+! 59 ! tmp=upper32(x);
+! 60 ! tmp1=upper32(x1);
+
+/* 0x003c 60 ( 0 1) */ add %g4,2,%g4
+/* 0x0040 ( 1 4) */ fdtox %f8,%f8
+/* 0x0044 ( 1 2) */ cmp %g4,%g3
+/* 0x0048 ( 5 6) */ fmovs %f0,%f6
+/* 0x004c ( 7 10) */ fxtod %f6,%f6
+/* 0x0050 ( 8 11) */ fdtox %f10,%f0
+/* 0x0054 57 (10 13) */ faddd %f6,%f2,%f2
+/* 0x0058 (10 11) */ std %f2,[%g1]
+/* 0x005c (12 15) */ ldd [%g2],%f2
+/* 0x0060 (14 15) */ fmovs %f2,%f0
+/* 0x0064 (16 19) */ fxtod %f0,%f6
+/* 0x0068 (17 20) */ fdtox %f10,%f0
+/* 0x006c (18 21) */ fitod %f8,%f2
+/* 0x0070 58 (19 22) */ faddd %f6,%f4,%f4
+/* 0x0074 (19 20) */ std %f4,[%g1+8]
+/* 0x0078 60 (19 20) */ add %g1,16,%g1
+/* 0x007c (20 23) */ fitod %f0,%f4
+/* 0x0080 (20 23) */ ldd [%g2],%f0
+/* 0x0084 (20 21) */ ble,a,pt %icc,.L900000114 ! tprob=0.86
+/* 0x0088 (21 24) */ ldd [%g1],%f8
+ .L77000116: /* frequency 1.0 confidence 0.0 */
+/* 0x008c ( 0 2) */ retl ! Result =
+/* 0x0090 ( 1 2) */ nop
+/* 0x0094 0 ( 0 0) */ .type cleanup,2
+/* 0x0094 ( 0 0) */ .size cleanup,(.-cleanup)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE conv_d16_to_i32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global conv_d16_to_i32
+ conv_d16_to_i32: /* frequency 1.0 confidence 0.0 */
+/* 000000 ( 0 1) */ save %sp,-136,%sp
+
+! 61 ! }
+! 62 ! /** end new code **/
+! 63 !}
+! 66 !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
+! 67 !{
+! 68 !int i;
+! 69 !long long t, t1, a, b, c, d;
+! 71 ! t1=0;
+! 72 ! a=(long long)d16[0];
+
+/* 0x0004 72 ( 1 4) */ ldd [%i1],%f0
+
+! 73 ! b=(long long)d16[1];
+! 74 ! for(i=0; i<ilen-1; i++)
+
+/* 0x0008 74 ( 1 2) */ sub %i3,1,%g2
+/* 0x000c 67 ( 1 2) */ or %g0,%i0,%g5
+/* 0x0010 74 ( 2 3) */ cmp %g2,0
+/* 0x0014 71 ( 2 3) */ or %g0,0,%o4
+/* 0x0018 72 ( 3 6) */ fdtox %f0,%f0
+/* 0x001c ( 3 4) */ std %f0,[%sp+120]
+/* 0x0020 74 ( 3 4) */ or %g0,0,%o7
+/* 0x0024 67 ( 4 5) */ or %g0,%i3,%o0
+/* 0x0028 ( 4 5) */ sub %i3,2,%o2
+/* 0x002c 73 ( 5 8) */ ldd [%i1+8],%f0
+/* 0x0030 67 ( 5 6) */ sethi %hi(0xfc00),%o0
+/* 0x0034 ( 5 6) */ add %o2,1,%g3
+/* 0x0038 ( 6 7) */ add %o0,1023,%o1
+/* 0x003c ( 6 7) */ or %g0,%g5,%o5
+/* 0x0040 73 ( 7 10) */ fdtox %f0,%f0
+/* 0x0044 ( 7 8) */ std %f0,[%sp+112]
+/* 0x0048 72 (11 13) */ ldx [%sp+120],%g4
+/* 0x004c 73 (12 14) */ ldx [%sp+112],%g1
+/* 0x0050 74 (12 13) */ ble,pt %icc,.L900000214 ! tprob=0.56
+/* 0x0054 (12 13) */ sethi %hi(0xfc00),%g2
+/* 0x0058 67 (13 14) */ or %g0,-1,%g2
+/* 0x005c 74 (13 14) */ cmp %g3,3
+/* 0x0060 67 (14 15) */ srl %g2,0,%o3
+/* 0x0064 (14 15) */ or %g0,%i1,%g2
+/* 0x0068 74 (14 15) */ bl,pn %icc,.L77000134 ! tprob=0.44
+/* 0x006c (15 18) */ ldd [%g2+16],%f0
+
+! 75 ! {
+! 76 ! c=(long long)d16[2*i+2];
+! 77 ! t1+=a&0xffffffff;
+! 78 ! t=(a>>32);
+! 79 ! d=(long long)d16[2*i+3];
+! 80 ! t1+=(b&0xffff)<<16;
+
+/* 0x0070 80 (15 16) */ and %g1,%o1,%o0
+
+! 81 ! t+=(b>>16)+(t1>>32);
+! 82 ! i32[i]=t1&0xffffffff;
+! 83 ! t1=t;
+! 84 ! a=c;
+! 85 ! b=d;
+
+/* 0x0074 85 (15 16) */ add %g2,16,%g2
+/* 0x0078 80 (16 17) */ sllx %o0,16,%g3
+/* 0x007c 77 (16 17) */ and %g4,%o3,%o0
+/* 0x0080 76 (17 20) */ fdtox %f0,%f0
+/* 0x0084 (17 18) */ std %f0,[%sp+104]
+/* 0x0088 74 (17 18) */ add %o0,%g3,%o4
+/* 0x008c 79 (18 21) */ ldd [%g2+8],%f2
+/* 0x0090 81 (18 19) */ srax %g1,16,%o0
+/* 0x0094 82 (18 19) */ and %o4,%o3,%o7
+/* 0x0098 81 (19 20) */ stx %o0,[%sp+112]
+/* 0x009c (19 20) */ srax %o4,32,%o0
+/* 0x00a0 85 (19 20) */ add %g5,4,%o5
+/* 0x00a4 81 (20 21) */ stx %o0,[%sp+120]
+/* 0x00a8 78 (20 21) */ srax %g4,32,%o4
+/* 0x00ac 79 (20 23) */ fdtox %f2,%f0
+/* 0x00b0 (21 22) */ std %f0,[%sp+96]
+/* 0x00b4 81 (22 24) */ ldx [%sp+112],%o0
+/* 0x00b8 (23 25) */ ldx [%sp+120],%g4
+/* 0x00bc 76 (25 27) */ ldx [%sp+104],%g3
+/* 0x00c0 81 (25 26) */ add %o0,%g4,%g4
+/* 0x00c4 79 (26 28) */ ldx [%sp+96],%g1
+/* 0x00c8 81 (26 27) */ add %o4,%g4,%o4
+/* 0x00cc 82 (27 28) */ st %o7,[%g5]
+/* 0x00d0 (27 28) */ or %g0,1,%o7
+/* 0x00d4 84 (27 28) */ or %g0,%g3,%g4
+ .L900000209: /* frequency 64.0 confidence 0.0 */
+/* 0x00d8 76 (17 19) */ ldd [%g2+16],%f0
+/* 0x00dc 85 (17 18) */ add %o7,1,%o7
+/* 0x00e0 (17 18) */ add %o5,4,%o5
+/* 0x00e4 (18 18) */ cmp %o7,%o2
+/* 0x00e8 (18 19) */ add %g2,16,%g2
+/* 0x00ec 76 (19 22) */ fdtox %f0,%f0
+/* 0x00f0 (20 21) */ std %f0,[%sp+104]
+/* 0x00f4 79 (21 23) */ ldd [%g2+8],%f0
+/* 0x00f8 (23 26) */ fdtox %f0,%f0
+/* 0x00fc (24 25) */ std %f0,[%sp+96]
+/* 0x0100 80 (25 26) */ and %g1,%o1,%g3
+/* 0x0104 (26 27) */ sllx %g3,16,%g3
+/* 0x0108 ( 0 0) */ stx %g3,[%sp+120]
+/* 0x010c 77 (26 27) */ and %g4,%o3,%g3
+/* 0x0110 74 ( 0 0) */ stx %o7,[%sp+128]
+/* 0x0114 ( 0 0) */ ldx [%sp+120],%o7
+/* 0x0118 (27 27) */ add %g3,%o7,%g3
+/* 0x011c ( 0 0) */ ldx [%sp+128],%o7
+/* 0x0120 81 (28 29) */ srax %g1,16,%g1
+/* 0x0124 74 (28 28) */ add %g3,%o4,%g3
+/* 0x0128 81 (29 30) */ srax %g3,32,%o4
+/* 0x012c ( 0 0) */ stx %o4,[%sp+112]
+/* 0x0130 78 (30 31) */ srax %g4,32,%o4
+/* 0x0134 81 ( 0 0) */ ldx [%sp+112],%g4
+/* 0x0138 (30 31) */ add %g1,%g4,%g4
+/* 0x013c 79 (31 33) */ ldx [%sp+96],%g1
+/* 0x0140 81 (31 32) */ add %o4,%g4,%o4
+/* 0x0144 82 (32 33) */ and %g3,%o3,%g3
+/* 0x0148 84 ( 0 0) */ ldx [%sp+104],%g4
+/* 0x014c 85 (33 34) */ ble,pt %icc,.L900000209 ! tprob=0.50
+/* 0x0150 (33 34) */ st %g3,[%o5-4]
+ .L900000212: /* frequency 8.0 confidence 0.0 */
+/* 0x0154 85 ( 0 1) */ ba .L900000214 ! tprob=1.00
+/* 0x0158 ( 0 1) */ sethi %hi(0xfc00),%g2
+ .L77000134: /* frequency 0.7 confidence 0.0 */
+ .L900000213: /* frequency 6.4 confidence 0.0 */
+/* 0x015c 77 ( 0 1) */ and %g4,%o3,%o0
+/* 0x0160 80 ( 0 1) */ and %g1,%o1,%g3
+/* 0x0164 76 ( 0 3) */ fdtox %f0,%f0
+/* 0x0168 77 ( 1 2) */ add %o4,%o0,%o0
+/* 0x016c 76 ( 1 2) */ std %f0,[%sp+104]
+/* 0x0170 85 ( 1 2) */ add %o7,1,%o7
+/* 0x0174 80 ( 2 3) */ sllx %g3,16,%o4
+/* 0x0178 79 ( 2 5) */ ldd [%g2+24],%f2
+/* 0x017c 85 ( 2 3) */ add %g2,16,%g2
+/* 0x0180 80 ( 3 4) */ add %o0,%o4,%o4
+/* 0x0184 81 ( 3 4) */ stx %o7,[%sp+128]
+/* 0x0188 ( 4 5) */ srax %g1,16,%o0
+/* 0x018c ( 4 5) */ stx %o0,[%sp+112]
+/* 0x0190 82 ( 4 5) */ and %o4,%o3,%g3
+/* 0x0194 81 ( 5 6) */ srax %o4,32,%o0
+/* 0x0198 ( 5 6) */ stx %o0,[%sp+120]
+/* 0x019c 79 ( 5 8) */ fdtox %f2,%f0
+/* 0x01a0 ( 6 7) */ std %f0,[%sp+96]
+/* 0x01a4 78 ( 6 7) */ srax %g4,32,%o4
+/* 0x01a8 81 ( 7 9) */ ldx [%sp+120],%o7
+/* 0x01ac ( 8 10) */ ldx [%sp+112],%g4
+/* 0x01b0 76 (10 12) */ ldx [%sp+104],%g1
+/* 0x01b4 81 (10 11) */ add %g4,%o7,%g4
+/* 0x01b8 (11 13) */ ldx [%sp+128],%o7
+/* 0x01bc (11 12) */ add %o4,%g4,%o4
+/* 0x01c0 79 (12 14) */ ldx [%sp+96],%o0
+/* 0x01c4 84 (12 13) */ or %g0,%g1,%g4
+/* 0x01c8 82 (13 14) */ st %g3,[%o5]
+/* 0x01cc 85 (13 14) */ add %o5,4,%o5
+/* 0x01d0 (13 14) */ cmp %o7,%o2
+/* 0x01d4 (14 15) */ or %g0,%o0,%g1
+/* 0x01d8 (14 15) */ ble,a,pt %icc,.L900000213 ! tprob=0.86
+/* 0x01dc (14 17) */ ldd [%g2+16],%f0
+ .L77000127: /* frequency 1.0 confidence 0.0 */
+
+! 86 ! }
+! 87 ! t1+=a&0xffffffff;
+! 88 ! t=(a>>32);
+! 89 ! t1+=(b&0xffff)<<16;
+! 90 ! i32[i]=t1&0xffffffff;
+
+/* 0x01e0 90 ( 0 1) */ sethi %hi(0xfc00),%g2
+ .L900000214: /* frequency 1.0 confidence 0.0 */
+/* 0x01e4 90 ( 0 1) */ or %g0,-1,%g3
+/* 0x01e8 ( 0 1) */ add %g2,1023,%g2
+/* 0x01ec ( 1 2) */ srl %g3,0,%g3
+/* 0x01f0 ( 1 2) */ and %g1,%g2,%g2
+/* 0x01f4 ( 2 3) */ and %g4,%g3,%g4
+/* 0x01f8 ( 3 4) */ sllx %g2,16,%g2
+/* 0x01fc ( 3 4) */ add %o4,%g4,%g4
+/* 0x0200 ( 4 5) */ add %g4,%g2,%g2
+/* 0x0204 ( 5 6) */ sll %o7,2,%g4
+/* 0x0208 ( 5 6) */ and %g2,%g3,%g2
+/* 0x020c ( 6 7) */ st %g2,[%g5+%g4]
+/* 0x0210 ( 7 9) */ ret ! Result =
+/* 0x0214 ( 9 10) */ restore %g0,%g0,%g0
+/* 0x0218 0 ( 0 0) */ .type conv_d16_to_i32,2
+/* 0x0218 ( 0 0) */ .size conv_d16_to_i32,(.-conv_d16_to_i32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000301: /* frequency 1.0 confidence 0.0 */
+/* 000000 0 ( 0 0) */ .word 1127219200,0
+/* 0x0008 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE conv_i32_to_d32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global conv_i32_to_d32
+ conv_i32_to_d32: /* frequency 1.0 confidence 0.0 */
+/* 000000 ( 0 1) */ orcc %g0,%o2,%g1
+
+! 92 !}
+! 94 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
+! 95 !{
+! 96 !int i;
+! 98 !#pragma pipeloop(0)
+! 99 ! for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
+
+/* 0x0004 99 ( 0 1) */ ble,pt %icc,.L77000140 ! tprob=0.56
+/* 0x0008 ( 0 1) */ nop
+/* 0x000c ( 1 2) */ sethi %hi(.L_const_seg_900000301),%g2
+/* 0x0010 95 ( 1 2) */ or %g0,%o1,%g4
+/* 0x0014 99 ( 2 3) */ add %g2,%lo(.L_const_seg_900000301),%g2
+/* 0x0018 ( 2 3) */ or %g0,0,%o5
+/* 0x001c 95 ( 3 4) */ or %g0,%o0,%g5
+/* 0x0020 99 ( 3 4) */ sub %o2,1,%g3
+/* 0x0024 ( 4 5) */ cmp %o2,9
+/* 0x0028 ( 4 5) */ bl,pn %icc,.L77000144 ! tprob=0.44
+/* 0x002c ( 4 7) */ ldd [%g2],%f8
+/* 0x0030 ( 5 8) */ ld [%o1],%f7
+/* 0x0034 ( 5 6) */ add %o1,16,%g4
+/* 0x0038 ( 5 6) */ sub %o2,5,%g1
+/* 0x003c ( 6 9) */ ld [%o1+4],%f5
+/* 0x0040 ( 6 7) */ or %g0,4,%o5
+/* 0x0044 ( 7 10) */ ld [%o1+8],%f3
+/* 0x0048 ( 7 8) */ fmovs %f8,%f6
+/* 0x004c ( 8 11) */ ld [%o1+12],%f1
+ .L900000305: /* frequency 64.0 confidence 0.0 */
+/* 0x0050 ( 8 16) */ ld [%g4],%f11
+/* 0x0054 ( 8 9) */ add %o5,5,%o5
+/* 0x0058 ( 8 9) */ add %g4,20,%g4
+/* 0x005c ( 8 11) */ fsubd %f6,%f8,%f6
+/* 0x0060 ( 9 10) */ std %f6,[%g5]
+/* 0x0064 ( 9 9) */ cmp %o5,%g1
+/* 0x0068 ( 9 10) */ add %g5,40,%g5
+/* 0x006c ( 0 0) */ fmovs %f8,%f4
+/* 0x0070 (10 18) */ ld [%g4-16],%f7
+/* 0x0074 (10 13) */ fsubd %f4,%f8,%f12
+/* 0x0078 ( 0 0) */ fmovs %f8,%f2
+/* 0x007c (11 12) */ std %f12,[%g5-32]
+/* 0x0080 (12 20) */ ld [%g4-12],%f5
+/* 0x0084 (12 15) */ fsubd %f2,%f8,%f12
+/* 0x0088 ( 0 0) */ fmovs %f8,%f0
+/* 0x008c (13 14) */ std %f12,[%g5-24]
+/* 0x0090 (14 22) */ ld [%g4-8],%f3
+/* 0x0094 (14 17) */ fsubd %f0,%f8,%f12
+/* 0x0098 ( 0 0) */ fmovs %f8,%f10
+/* 0x009c (15 16) */ std %f12,[%g5-16]
+/* 0x00a0 (16 24) */ ld [%g4-4],%f1
+/* 0x00a4 (16 19) */ fsubd %f10,%f8,%f10
+/* 0x00a8 ( 0 0) */ fmovs %f8,%f6
+/* 0x00ac (17 18) */ ble,pt %icc,.L900000305 ! tprob=0.50
+/* 0x00b0 (17 18) */ std %f10,[%g5-8]
+ .L900000308: /* frequency 8.0 confidence 0.0 */
+/* 0x00b4 ( 0 1) */ fmovs %f8,%f4
+/* 0x00b8 ( 0 1) */ add %g5,32,%g5
+/* 0x00bc ( 0 1) */ cmp %o5,%g3
+/* 0x00c0 ( 1 2) */ fmovs %f8,%f2
+/* 0x00c4 ( 2 3) */ fmovs %f8,%f0
+/* 0x00c8 ( 4 7) */ fsubd %f6,%f8,%f6
+/* 0x00cc ( 4 5) */ std %f6,[%g5-32]
+/* 0x00d0 ( 5 8) */ fsubd %f4,%f8,%f4
+/* 0x00d4 ( 5 6) */ std %f4,[%g5-24]
+/* 0x00d8 ( 6 9) */ fsubd %f2,%f8,%f2
+/* 0x00dc ( 6 7) */ std %f2,[%g5-16]
+/* 0x00e0 ( 7 10) */ fsubd %f0,%f8,%f0
+/* 0x00e4 ( 7 8) */ bg,pn %icc,.L77000140 ! tprob=0.14
+/* 0x00e8 ( 7 8) */ std %f0,[%g5-8]
+ .L77000144: /* frequency 0.7 confidence 0.0 */
+/* 0x00ec ( 0 3) */ ld [%g4],%f1
+ .L900000309: /* frequency 6.4 confidence 0.0 */
+/* 0x00f0 ( 0 3) */ ldd [%g2],%f8
+/* 0x00f4 ( 0 1) */ add %o5,1,%o5
+/* 0x00f8 ( 0 1) */ add %g4,4,%g4
+/* 0x00fc ( 1 2) */ cmp %o5,%g3
+/* 0x0100 ( 2 3) */ fmovs %f8,%f0
+/* 0x0104 ( 4 7) */ fsubd %f0,%f8,%f0
+/* 0x0108 ( 4 5) */ std %f0,[%g5]
+/* 0x010c ( 4 5) */ add %g5,8,%g5
+/* 0x0110 ( 4 5) */ ble,a,pt %icc,.L900000309 ! tprob=0.86
+/* 0x0114 ( 6 9) */ ld [%g4],%f1
+ .L77000140: /* frequency 1.0 confidence 0.0 */
+/* 0x0118 ( 0 2) */ retl ! Result =
+/* 0x011c ( 1 2) */ nop
+/* 0x0120 0 ( 0 0) */ .type conv_i32_to_d32,2
+/* 0x0120 ( 0 0) */ .size conv_i32_to_d32,(.-conv_i32_to_d32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000401: /* frequency 1.0 confidence 0.0 */
+/* 000000 0 ( 0 0) */ .word 1127219200,0
+/* 0x0008 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE conv_i32_to_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global conv_i32_to_d16
+ conv_i32_to_d16: /* frequency 1.0 confidence 0.0 */
+/* 000000 ( 0 1) */ save %sp,-104,%sp
+/* 0x0004 ( 1 2) */ orcc %g0,%i2,%o0
+
+! 100 !}
+! 103 !void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
+! 104 !{
+! 105 !int i;
+! 106 !unsigned int a;
+! 108 !#pragma pipeloop(0)
+! 109 ! for(i=0;i<len;i++)
+
+/* 0x0008 109 ( 1 2) */ ble,pt %icc,.L77000150 ! tprob=0.56
+/* 0x000c ( 1 2) */ nop
+/* 0x0010 ( 2 3) */ sub %o0,1,%o5
+/* 0x0014 ( 2 3) */ sethi %hi(0xfc00),%g2
+
+! 110 ! {
+! 111 ! a=i32[i];
+! 112 ! d16[2*i]=(double)(a&0xffff);
+! 113 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x0018 113 ( 3 4) */ sethi %hi(.L_const_seg_900000401),%o0
+/* 0x001c ( 3 4) */ add %o5,1,%g3
+/* 0x0020 ( 4 5) */ add %g2,1023,%o4
+/* 0x0024 109 ( 4 5) */ or %g0,0,%g1
+/* 0x0028 ( 5 6) */ cmp %g3,3
+/* 0x002c ( 5 6) */ or %g0,%i1,%o7
+/* 0x0030 ( 6 7) */ add %o0,%lo(.L_const_seg_900000401),%o3
+/* 0x0034 ( 6 7) */ or %g0,%i0,%g2
+/* 0x0038 ( 6 7) */ bl,pn %icc,.L77000154 ! tprob=0.44
+/* 0x003c ( 7 8) */ add %o7,4,%o0
+/* 0x0040 112 ( 7 10) */ ldd [%o3],%f0
+/* 0x0044 113 ( 7 8) */ or %g0,1,%g1
+/* 0x0048 111 ( 8 11) */ ld [%o0-4],%o1
+/* 0x004c 0 ( 8 9) */ or %g0,%o0,%o7
+/* 0x0050 112 (10 11) */ and %o1,%o4,%o0
+ .L900000406: /* frequency 64.0 confidence 0.0 */
+/* 0x0054 112 (22 23) */ st %o0,[%sp+96]
+/* 0x0058 113 (22 23) */ add %g1,1,%g1
+/* 0x005c (22 23) */ add %g2,16,%g2
+/* 0x0060 (23 23) */ cmp %g1,%o5
+/* 0x0064 (23 24) */ add %o7,4,%o7
+/* 0x0068 112 (29 31) */ ld [%sp+96],%f3
+/* 0x006c ( 0 0) */ fmovs %f0,%f2
+/* 0x0070 (31 34) */ fsubd %f2,%f0,%f2
+/* 0x0074 113 (32 33) */ srl %o1,16,%o0
+/* 0x0078 112 (32 33) */ std %f2,[%g2-16]
+/* 0x007c 113 (33 34) */ st %o0,[%sp+92]
+/* 0x0080 (40 42) */ ld [%sp+92],%f3
+/* 0x0084 111 (41 43) */ ld [%o7-4],%o1
+/* 0x0088 113 ( 0 0) */ fmovs %f0,%f2
+/* 0x008c (42 45) */ fsubd %f2,%f0,%f2
+/* 0x0090 112 (43 44) */ and %o1,%o4,%o0
+/* 0x0094 113 (43 44) */ ble,pt %icc,.L900000406 ! tprob=0.50
+/* 0x0098 (43 44) */ std %f2,[%g2-8]
+ .L900000409: /* frequency 8.0 confidence 0.0 */
+/* 0x009c 112 ( 0 1) */ st %o0,[%sp+96]
+/* 0x00a0 ( 0 1) */ fmovs %f0,%f2
+/* 0x00a4 113 ( 0 1) */ add %g2,16,%g2
+/* 0x00a8 ( 1 2) */ srl %o1,16,%o0
+/* 0x00ac 112 ( 4 7) */ ld [%sp+96],%f3
+/* 0x00b0 ( 6 9) */ fsubd %f2,%f0,%f2
+/* 0x00b4 ( 6 7) */ std %f2,[%g2-16]
+/* 0x00b8 113 ( 7 8) */ st %o0,[%sp+92]
+/* 0x00bc (10 11) */ fmovs %f0,%f2
+/* 0x00c0 (11 14) */ ld [%sp+92],%f3
+/* 0x00c4 (13 16) */ fsubd %f2,%f0,%f0
+/* 0x00c8 (13 14) */ std %f0,[%g2-8]
+/* 0x00cc (14 16) */ ret ! Result =
+/* 0x00d0 (16 17) */ restore %g0,%g0,%g0
+ .L77000154: /* frequency 0.7 confidence 0.0 */
+/* 0x00d4 111 ( 0 3) */ ld [%o7],%o0
+ .L900000410: /* frequency 6.4 confidence 0.0 */
+/* 0x00d8 112 ( 0 1) */ and %o0,%o4,%o1
+/* 0x00dc ( 0 1) */ st %o1,[%sp+96]
+/* 0x00e0 113 ( 0 1) */ add %g1,1,%g1
+/* 0x00e4 112 ( 1 4) */ ldd [%o3],%f0
+/* 0x00e8 113 ( 1 2) */ srl %o0,16,%o0
+/* 0x00ec ( 1 2) */ add %o7,4,%o7
+/* 0x00f0 ( 2 3) */ cmp %g1,%o5
+/* 0x00f4 112 ( 3 4) */ fmovs %f0,%f2
+/* 0x00f8 ( 4 7) */ ld [%sp+96],%f3
+/* 0x00fc ( 6 9) */ fsubd %f2,%f0,%f2
+/* 0x0100 ( 6 7) */ std %f2,[%g2]
+/* 0x0104 113 ( 7 8) */ st %o0,[%sp+92]
+/* 0x0108 (10 11) */ fmovs %f0,%f2
+/* 0x010c (11 14) */ ld [%sp+92],%f3
+/* 0x0110 (13 16) */ fsubd %f2,%f0,%f0
+/* 0x0114 (13 14) */ std %f0,[%g2+8]
+/* 0x0118 (13 14) */ add %g2,16,%g2
+/* 0x011c (13 14) */ ble,a,pt %icc,.L900000410 ! tprob=0.86
+/* 0x0120 (14 17) */ ld [%o7],%o0
+ .L77000150: /* frequency 1.0 confidence 0.0 */
+/* 0x0124 ( 0 2) */ ret ! Result =
+/* 0x0128 ( 2 3) */ restore %g0,%g0,%g0
+/* 0x012c 0 ( 0 0) */ .type conv_i32_to_d16,2
+/* 0x012c ( 0 0) */ .size conv_i32_to_d16,(.-conv_i32_to_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000501: /* frequency 1.0 confidence 0.0 */
+/* 000000 0 ( 0 0) */ .word 1127219200,0
+/* 0x0008 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE conv_i32_to_d32_and_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global conv_i32_to_d32_and_d16
+ conv_i32_to_d32_and_d16: /* frequency 1.0 confidence 0.0 */
+/* 000000 ( 0 1) */ save %sp,-104,%sp
+/* 0x0004 ( 1 2) */ or %g0,%i3,%i4
+/* 0x0008 ( 1 2) */ or %g0,%i2,%g1
+
+! 114 ! }
+! 115 !}
+! 118 !void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
+! 119 ! double * /* 0 */,
+! 120 ! double * /*result16*/, double * /* result32 */,
+! 121 ! float * /*source - should be unsigned int*
+! 122 ! converted to float* */);
+! 126 !void conv_i32_to_d32_and_d16(double *d32, double *d16,
+! 127 ! unsigned int *i32, int len)
+! 128 !{
+! 129 !int i;
+! 130 !unsigned int a;
+! 132 !#pragma pipeloop(0)
+! 133 ! for(i=0;i<len-3;i+=4)
+
+/* 0x000c 133 ( 2 3) */ sub %i4,3,%g2
+/* 0x0010 ( 2 3) */ or %g0,0,%o7
+/* 0x0014 ( 3 4) */ cmp %g2,0
+/* 0x0018 128 ( 3 4) */ or %g0,%i0,%i3
+/* 0x001c 133 ( 3 4) */ ble,pt %icc,.L900000515 ! tprob=0.56
+/* 0x0020 ( 4 5) */ cmp %o7,%i4
+
+! 134 ! {
+! 135 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
+! 136 ! &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
+
+/* 0x0024 136 ( 4 5) */ sethi %hi(Zero),%g2
+/* 0x0028 133 ( 5 6) */ or %g0,%g1,%o3
+/* 0x002c ( 5 6) */ sub %i4,4,%o2
+/* 0x0030 136 ( 6 7) */ add %g2,%lo(Zero),%o1
+/* 0x0034 133 ( 6 7) */ or %g0,0,%o5
+/* 0x0038 ( 7 8) */ or %g0,0,%o4
+/* 0x003c 136 ( 7 8) */ or %g0,%o3,%g4
+ .L900000514: /* frequency 6.4 confidence 0.0 */
+/* 0x0040 ( 0 3) */ ldd [%o1],%f2
+/* 0x0044 136 ( 0 1) */ add %i3,%o5,%g2
+/* 0x0048 ( 0 1) */ add %i1,%o4,%g3
+/* 0x004c ( 1 4) */ ldd [%o1-8],%f0
+/* 0x0050 ( 1 2) */ add %o7,4,%o7
+/* 0x0054 ( 1 2) */ add %o3,16,%o3
+/* 0x0058 ( 2 3) */ fmovd %f2,%f14
+/* 0x005c ( 2 5) */ ld [%g4],%f15
+/* 0x0060 ( 2 3) */ cmp %o7,%o2
+/* 0x0064 ( 3 4) */ fmovd %f2,%f10
+/* 0x0068 ( 3 6) */ ld [%g4+4],%f11
+/* 0x006c ( 4 5) */ fmovd %f2,%f6
+/* 0x0070 ( 4 7) */ ld [%g4+8],%f7
+/* 0x0074 ( 5 8) */ ld [%g4+12],%f3
+/* 0x0078 ( 5 8) */ fxtod %f14,%f14
+/* 0x007c ( 6 9) */ fxtod %f10,%f10
+/* 0x0080 ( 6 9) */ ldd [%o1-16],%f16
+/* 0x0084 ( 7 10) */ fxtod %f6,%f6
+/* 0x0088 ( 7 8) */ std %f14,[%i3+%o5]
+/* 0x008c ( 7 8) */ add %o5,32,%o5
+/* 0x0090 ( 8 11) */ fxtod %f2,%f2
+/* 0x0094 ( 8 11) */ fmuld %f0,%f14,%f12
+/* 0x0098 ( 8 9) */ std %f10,[%g2+8]
+/* 0x009c ( 9 12) */ fmuld %f0,%f10,%f8
+/* 0x00a0 ( 9 10) */ std %f6,[%g2+16]
+/* 0x00a4 (10 13) */ fmuld %f0,%f6,%f4
+/* 0x00a8 (10 11) */ std %f2,[%g2+24]
+/* 0x00ac (11 14) */ fmuld %f0,%f2,%f0
+/* 0x00b0 (11 14) */ fdtox %f12,%f12
+/* 0x00b4 (12 15) */ fdtox %f8,%f8
+/* 0x00b8 (13 16) */ fdtox %f4,%f4
+/* 0x00bc (14 17) */ fdtox %f0,%f0
+/* 0x00c0 (15 18) */ fxtod %f12,%f12
+/* 0x00c4 (15 16) */ std %f12,[%g3+8]
+/* 0x00c8 (16 19) */ fxtod %f8,%f8
+/* 0x00cc (16 17) */ std %f8,[%g3+24]
+/* 0x00d0 (17 20) */ fxtod %f4,%f4
+/* 0x00d4 (17 18) */ std %f4,[%g3+40]
+/* 0x00d8 (18 21) */ fxtod %f0,%f0
+/* 0x00dc (18 21) */ fmuld %f12,%f16,%f12
+/* 0x00e0 (18 19) */ std %f0,[%g3+56]
+/* 0x00e4 (19 22) */ fmuld %f8,%f16,%f8
+/* 0x00e8 (20 23) */ fmuld %f4,%f16,%f4
+/* 0x00ec (21 24) */ fmuld %f0,%f16,%f0
+/* 0x00f0 (21 24) */ fsubd %f14,%f12,%f12
+/* 0x00f4 (21 22) */ std %f12,[%i1+%o4]
+/* 0x00f8 (22 25) */ fsubd %f10,%f8,%f8
+/* 0x00fc (22 23) */ std %f8,[%g3+16]
+/* 0x0100 (22 23) */ add %o4,64,%o4
+/* 0x0104 (23 26) */ fsubd %f6,%f4,%f4
+/* 0x0108 (23 24) */ std %f4,[%g3+32]
+/* 0x010c (24 27) */ fsubd %f2,%f0,%f0
+/* 0x0110 (24 25) */ std %f0,[%g3+48]
+/* 0x0114 (24 25) */ ble,pt %icc,.L900000514 ! tprob=0.86
+/* 0x0118 (25 26) */ or %g0,%o3,%g4
+ .L77000159: /* frequency 1.0 confidence 0.0 */
+
+! 137 ! }
+! 138 ! for(;i<len;i++)
+
+/* 0x011c 138 ( 0 1) */ cmp %o7,%i4
+ .L900000515: /* frequency 1.0 confidence 0.0 */
+/* 0x0120 138 ( 0 1) */ bge,pt %icc,.L77000164 ! tprob=0.56
+/* 0x0124 ( 0 1) */ nop
+
+! 139 ! {
+! 140 ! a=i32[i];
+! 141 ! d32[i]=(double)(i32[i]);
+! 142 ! d16[2*i]=(double)(a&0xffff);
+! 143 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x0128 143 ( 0 1) */ sethi %hi(.L_const_seg_900000501),%o1
+/* 0x012c 138 ( 1 2) */ sethi %hi(0xfc00),%o0
+/* 0x0130 141 ( 1 4) */ ldd [%o1+%lo(.L_const_seg_900000501)],%f0
+/* 0x0134 138 ( 1 2) */ sub %i4,%o7,%g3
+/* 0x0138 ( 2 3) */ sll %o7,2,%g2
+/* 0x013c ( 2 3) */ add %o0,1023,%o3
+/* 0x0140 ( 3 4) */ sll %o7,3,%g4
+/* 0x0144 ( 3 4) */ cmp %g3,3
+/* 0x0148 ( 4 5) */ add %g1,%g2,%o0
+/* 0x014c ( 4 5) */ add %o1,%lo(.L_const_seg_900000501),%o2
+/* 0x0150 ( 5 6) */ add %i3,%g4,%o4
+/* 0x0154 ( 5 6) */ sub %i4,1,%o1
+/* 0x0158 ( 6 7) */ sll %o7,4,%g5
+/* 0x015c ( 6 7) */ bl,pn %icc,.L77000161 ! tprob=0.44
+/* 0x0160 ( 7 8) */ add %i1,%g5,%o5
+/* 0x0164 141 ( 7 10) */ ld [%g1+%g2],%f3
+/* 0x0168 143 ( 7 8) */ add %o4,8,%o4
+/* 0x016c 140 ( 8 11) */ ld [%g1+%g2],%g1
+/* 0x0170 143 ( 8 9) */ add %o5,16,%o5
+/* 0x0174 ( 8 9) */ add %o7,1,%o7
+/* 0x0178 141 ( 9 10) */ fmovs %f0,%f2
+/* 0x017c 143 ( 9 10) */ add %o0,4,%o0
+/* 0x0180 142 (10 11) */ and %g1,%o3,%g2
+/* 0x0184 141 (11 14) */ fsubd %f2,%f0,%f2
+/* 0x0188 (11 12) */ std %f2,[%o4-8]
+/* 0x018c 143 (11 12) */ srl %g1,16,%g1
+/* 0x0190 142 (12 13) */ st %g2,[%sp+96]
+/* 0x0194 (15 16) */ fmovs %f0,%f2
+/* 0x0198 (16 19) */ ld [%sp+96],%f3
+/* 0x019c (18 21) */ fsubd %f2,%f0,%f2
+/* 0x01a0 (18 19) */ std %f2,[%o5-16]
+/* 0x01a4 143 (19 20) */ st %g1,[%sp+92]
+/* 0x01a8 (22 23) */ fmovs %f0,%f2
+/* 0x01ac (23 26) */ ld [%sp+92],%f3
+/* 0x01b0 (25 28) */ fsubd %f2,%f0,%f2
+/* 0x01b4 (25 26) */ std %f2,[%o5-8]
+ .L900000509: /* frequency 64.0 confidence 0.0 */
+/* 0x01b8 141 (26 28) */ ld [%o0],%f3
+/* 0x01bc 143 (26 27) */ add %o7,2,%o7
+/* 0x01c0 (26 27) */ add %o5,32,%o5
+/* 0x01c4 140 (27 29) */ ld [%o0],%g1
+/* 0x01c8 143 (27 27) */ cmp %o7,%o1
+/* 0x01cc (27 28) */ add %o4,16,%o4
+/* 0x01d0 141 ( 0 0) */ fmovs %f0,%f2
+/* 0x01d4 (28 31) */ fsubd %f2,%f0,%f2
+/* 0x01d8 (29 30) */ std %f2,[%o4-16]
+/* 0x01dc 142 (29 30) */ and %g1,%o3,%g2
+/* 0x01e0 (30 31) */ st %g2,[%sp+96]
+/* 0x01e4 (37 39) */ ld [%sp+96],%f3
+/* 0x01e8 ( 0 0) */ fmovs %f0,%f2
+/* 0x01ec (39 42) */ fsubd %f2,%f0,%f2
+/* 0x01f0 143 (40 41) */ srl %g1,16,%g1
+/* 0x01f4 142 (40 41) */ std %f2,[%o5-32]
+/* 0x01f8 143 (41 42) */ st %g1,[%sp+92]
+/* 0x01fc (48 50) */ ld [%sp+92],%f3
+/* 0x0200 ( 0 0) */ fmovs %f0,%f2
+/* 0x0204 (50 53) */ fsubd %f2,%f0,%f2
+/* 0x0208 (51 52) */ std %f2,[%o5-24]
+/* 0x020c (51 52) */ add %o0,4,%o0
+/* 0x0210 141 (52 54) */ ld [%o0],%f3
+/* 0x0214 140 (53 55) */ ld [%o0],%g1
+/* 0x0218 141 ( 0 0) */ fmovs %f0,%f2
+/* 0x021c (54 57) */ fsubd %f2,%f0,%f2
+/* 0x0220 (55 56) */ std %f2,[%o4-8]
+/* 0x0224 142 (55 56) */ and %g1,%o3,%g2
+/* 0x0228 (56 57) */ st %g2,[%sp+96]
+/* 0x022c (63 65) */ ld [%sp+96],%f3
+/* 0x0230 ( 0 0) */ fmovs %f0,%f2
+/* 0x0234 (65 68) */ fsubd %f2,%f0,%f2
+/* 0x0238 143 (66 67) */ srl %g1,16,%g1
+/* 0x023c 142 (66 67) */ std %f2,[%o5-16]
+/* 0x0240 143 (67 68) */ st %g1,[%sp+92]
+/* 0x0244 (74 76) */ ld [%sp+92],%f3
+/* 0x0248 ( 0 0) */ fmovs %f0,%f2
+/* 0x024c (76 79) */ fsubd %f2,%f0,%f2
+/* 0x0250 (77 78) */ std %f2,[%o5-8]
+/* 0x0254 (77 78) */ bl,pt %icc,.L900000509 ! tprob=0.50
+/* 0x0258 (77 78) */ add %o0,4,%o0
+ .L900000512: /* frequency 8.0 confidence 0.0 */
+/* 0x025c 143 ( 0 1) */ cmp %o7,%i4
+/* 0x0260 ( 0 1) */ bge,pn %icc,.L77000164 ! tprob=0.14
+/* 0x0264 ( 0 1) */ nop
+ .L77000161: /* frequency 0.7 confidence 0.0 */
+/* 0x0268 141 ( 0 3) */ ld [%o0],%f3
+ .L900000513: /* frequency 6.4 confidence 0.0 */
+/* 0x026c 141 ( 0 3) */ ldd [%o2],%f0
+/* 0x0270 143 ( 0 1) */ add %o7,1,%o7
+/* 0x0274 140 ( 1 4) */ ld [%o0],%o1
+/* 0x0278 143 ( 1 2) */ add %o0,4,%o0
+/* 0x027c ( 1 2) */ cmp %o7,%i4
+/* 0x0280 141 ( 2 3) */ fmovs %f0,%f2
+/* 0x0284 142 ( 3 4) */ and %o1,%o3,%g1
+/* 0x0288 141 ( 4 7) */ fsubd %f2,%f0,%f2
+/* 0x028c ( 4 5) */ std %f2,[%o4]
+/* 0x0290 143 ( 4 5) */ srl %o1,16,%o1
+/* 0x0294 142 ( 5 6) */ st %g1,[%sp+96]
+/* 0x0298 143 ( 5 6) */ add %o4,8,%o4
+/* 0x029c 142 ( 8 9) */ fmovs %f0,%f2
+/* 0x02a0 ( 9 12) */ ld [%sp+96],%f3
+/* 0x02a4 (11 14) */ fsubd %f2,%f0,%f2
+/* 0x02a8 (11 12) */ std %f2,[%o5]
+/* 0x02ac 143 (12 13) */ st %o1,[%sp+92]
+/* 0x02b0 (15 16) */ fmovs %f0,%f2
+/* 0x02b4 (16 19) */ ld [%sp+92],%f3
+/* 0x02b8 (18 21) */ fsubd %f2,%f0,%f0
+/* 0x02bc (18 19) */ std %f0,[%o5+8]
+/* 0x02c0 (18 19) */ add %o5,16,%o5
+/* 0x02c4 (18 19) */ bl,a,pt %icc,.L900000513 ! tprob=0.86
+/* 0x02c8 (19 22) */ ld [%o0],%f3
+ .L77000164: /* frequency 1.0 confidence 0.0 */
+/* 0x02cc ( 0 2) */ ret ! Result =
+/* 0x02d0 ( 2 3) */ restore %g0,%g0,%g0
+/* 0x02d4 0 ( 0 0) */ .type conv_i32_to_d32_and_d16,2
+/* 0x02d4 ( 0 0) */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE adjust_montf_result
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global adjust_montf_result
+ adjust_montf_result: /* frequency 1.0 confidence 0.0 */
+
+! 144 ! }
+! 145 !}
+! 148 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
+! 149 !{
+! 150 !long long acc;
+! 151 !int i;
+! 153 ! if(i32[len]>0) i=-1;
+
+/* 000000 153 ( 0 1) */ sll %o2,2,%g1
+/* 0x0004 ( 0 1) */ or %g0,-1,%g3
+/* 0x0008 ( 1 4) */ ld [%o0+%g1],%g1
+/* 0x000c ( 3 4) */ cmp %g1,0
+/* 0x0010 ( 3 4) */ bleu,pn %icc,.L77000175 ! tprob=0.50
+/* 0x0014 ( 3 4) */ or %g0,%o1,%o3
+/* 0x0018 ( 4 5) */ ba .L900000611 ! tprob=1.00
+/* 0x001c ( 4 5) */ cmp %g3,0
+ .L77000175: /* frequency 0.8 confidence 0.0 */
+
+! 154 ! else
+! 155 ! {
+! 156 ! for(i=len-1; i>=0; i++)
+
+/* 0x0020 156 ( 0 1) */ subcc %o2,1,%g3
+/* 0x0024 ( 0 1) */ bneg,pt %icc,.L900000611 ! tprob=0.60
+/* 0x0028 ( 1 2) */ cmp %g3,0
+/* 0x002c ( 1 2) */ sll %g3,2,%g1
+/* 0x0030 ( 2 3) */ add %o0,%g1,%g2
+/* 0x0034 ( 2 3) */ add %o1,%g1,%g1
+
+! 157 ! {
+! 158 ! if(i32[i]!=nint[i]) break;
+
+/* 0x0038 158 ( 3 6) */ ld [%g1],%g5
+ .L900000610: /* frequency 5.3 confidence 0.0 */
+/* 0x003c 158 ( 0 3) */ ld [%g2],%o5
+/* 0x0040 ( 0 1) */ add %g1,4,%g1
+/* 0x0044 ( 0 1) */ add %g2,4,%g2
+/* 0x0048 ( 2 3) */ cmp %o5,%g5
+/* 0x004c ( 2 3) */ bne,pn %icc,.L77000182 ! tprob=0.16
+/* 0x0050 ( 2 3) */ nop
+/* 0x0054 ( 3 4) */ addcc %g3,1,%g3
+/* 0x0058 ( 3 4) */ bpos,a,pt %icc,.L900000610 ! tprob=0.84
+/* 0x005c ( 3 6) */ ld [%g1],%g5
+ .L77000182: /* frequency 1.0 confidence 0.0 */
+
+! 159 ! }
+! 160 ! }
+! 161 ! if((i<0)||(i32[i]>nint[i]))
+
+/* 0x0060 161 ( 0 1) */ cmp %g3,0
+ .L900000611: /* frequency 1.0 confidence 0.0 */
+/* 0x0064 161 ( 0 1) */ bl,pn %icc,.L77000198 ! tprob=0.50
+/* 0x0068 ( 0 1) */ sll %g3,2,%g2
+/* 0x006c ( 1 4) */ ld [%o1+%g2],%g1
+/* 0x0070 ( 2 5) */ ld [%o0+%g2],%g2
+/* 0x0074 ( 4 5) */ cmp %g2,%g1
+/* 0x0078 ( 4 5) */ bleu,pt %icc,.L77000191 ! tprob=0.56
+/* 0x007c ( 4 5) */ nop
+ .L77000198: /* frequency 0.8 confidence 0.0 */
+
+! 162 ! {
+! 163 ! acc=0;
+! 164 ! for(i=0;i<len;i++)
+
+/* 0x0080 164 ( 0 1) */ cmp %o2,0
+/* 0x0084 ( 0 1) */ ble,pt %icc,.L77000191 ! tprob=0.60
+/* 0x0088 ( 0 1) */ nop
+/* 0x008c 161 ( 1 2) */ or %g0,-1,%g2
+/* 0x0090 ( 1 2) */ sub %o2,1,%g4
+/* 0x0094 ( 2 3) */ srl %g2,0,%g3
+/* 0x0098 163 ( 2 3) */ or %g0,0,%g5
+/* 0x009c 164 ( 3 4) */ or %g0,0,%o5
+/* 0x00a0 161 ( 3 4) */ or %g0,%o0,%o4
+/* 0x00a4 ( 4 5) */ cmp %o2,3
+/* 0x00a8 ( 4 5) */ add %o1,4,%g2
+/* 0x00ac 164 ( 4 5) */ bl,pn %icc,.L77000199 ! tprob=0.40
+/* 0x00b0 ( 5 6) */ add %o0,8,%g1
+
+! 165 ! {
+! 166 ! acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
+
+/* 0x00b4 166 ( 5 8) */ ld [%o0],%o2
+/* 0x00b8 0 ( 5 6) */ or %g0,%g2,%o3
+/* 0x00bc 166 ( 6 9) */ ld [%o1],%o1
+/* 0x00c0 0 ( 6 7) */ or %g0,%g1,%o4
+
+! 167 ! i32[i]=acc&0xffffffff;
+! 168 ! acc=acc>>32;
+
+/* 0x00c4 168 ( 6 7) */ or %g0,2,%o5
+/* 0x00c8 166 ( 7 10) */ ld [%o0+4],%g1
+/* 0x00cc 164 ( 8 9) */ sub %o2,%o1,%o2
+/* 0x00d0 ( 9 10) */ or %g0,%o2,%g5
+/* 0x00d4 167 ( 9 10) */ and %o2,%g3,%o2
+/* 0x00d8 ( 9 10) */ st %o2,[%o0]
+/* 0x00dc 168 (10 11) */ srax %g5,32,%g5
+ .L900000605: /* frequency 64.0 confidence 0.0 */
+/* 0x00e0 166 (12 20) */ ld [%o3],%o2
+/* 0x00e4 168 (12 13) */ add %o5,1,%o5
+/* 0x00e8 (12 13) */ add %o3,4,%o3
+/* 0x00ec (13 13) */ cmp %o5,%g4
+/* 0x00f0 (13 14) */ add %o4,4,%o4
+/* 0x00f4 164 (14 14) */ sub %g1,%o2,%g1
+/* 0x00f8 (15 15) */ add %g1,%g5,%g5
+/* 0x00fc 167 (16 17) */ and %g5,%g3,%o2
+/* 0x0100 166 (16 24) */ ld [%o4-4],%g1
+/* 0x0104 167 (17 18) */ st %o2,[%o4-8]
+/* 0x0108 168 (17 18) */ ble,pt %icc,.L900000605 ! tprob=0.50
+/* 0x010c (17 18) */ srax %g5,32,%g5
+ .L900000608: /* frequency 8.0 confidence 0.0 */
+/* 0x0110 166 ( 0 3) */ ld [%o3],%g2
+/* 0x0114 164 ( 2 3) */ sub %g1,%g2,%g1
+/* 0x0118 ( 3 4) */ add %g1,%g5,%g1
+/* 0x011c 167 ( 4 5) */ and %g1,%g3,%g2
+/* 0x0120 ( 5 7) */ retl ! Result =
+/* 0x0124 ( 6 7) */ st %g2,[%o4-4]
+ .L77000199: /* frequency 0.6 confidence 0.0 */
+/* 0x0128 166 ( 0 3) */ ld [%o4],%g1
+ .L900000609: /* frequency 5.3 confidence 0.0 */
+/* 0x012c 166 ( 0 3) */ ld [%o3],%g2
+/* 0x0130 ( 0 1) */ add %g5,%g1,%g1
+/* 0x0134 168 ( 0 1) */ add %o5,1,%o5
+/* 0x0138 ( 1 2) */ add %o3,4,%o3
+/* 0x013c ( 1 2) */ cmp %o5,%g4
+/* 0x0140 166 ( 2 3) */ sub %g1,%g2,%g1
+/* 0x0144 167 ( 3 4) */ and %g1,%g3,%g2
+/* 0x0148 ( 3 4) */ st %g2,[%o4]
+/* 0x014c 168 ( 3 4) */ add %o4,4,%o4
+/* 0x0150 ( 4 5) */ srax %g1,32,%g5
+/* 0x0154 ( 4 5) */ ble,a,pt %icc,.L900000609 ! tprob=0.84
+/* 0x0158 ( 4 7) */ ld [%o4],%g1
+ .L77000191: /* frequency 1.0 confidence 0.0 */
+/* 0x015c ( 0 2) */ retl ! Result =
+/* 0x0160 ( 1 2) */ nop
+/* 0x0164 0 ( 0 0) */ .type adjust_montf_result,2
+/* 0x0164 ( 0 0) */ .size adjust_montf_result,(.-adjust_montf_result)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 32
+!
+! SUBROUTINE mont_mulf_noconv
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global mont_mulf_noconv
+ mont_mulf_noconv: /* frequency 1.0 confidence 0.0 */
+/* 000000 ( 0 1) */ save %sp,-144,%sp
+/* 0x0004 ( 1 2) */ st %i0,[%fp+68]
+
+! 169 ! }
+! 170 ! }
+! 171 !}
+! 175 !void cleanup(double *dt, int from, int tlen);
+! 177 !/*
+! 178 !** the lengths of the input arrays should be at least the following:
+! 179 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
+! 180 !** all of them should be different from one another
+! 181 !**
+! 182 !*/
+! 183 !void mont_mulf_noconv(unsigned int *result,
+! 184 ! double *dm1, double *dm2, double *dt,
+! 185 ! double *dn, unsigned int *nint,
+! 186 ! int nlen, double dn0)
+! 187 !{
+! 188 ! int i, j, jj;
+! 189 ! int tmp;
+! 190 ! double digit, m2j, nextm2j, a, b;
+! 191 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
+! 193 ! pdm1=&(dm1[0]);
+! 194 ! pdm2=&(dm2[0]);
+! 195 ! pdn=&(dn[0]);
+! 196 ! pdm2[2*nlen]=Zero;
+
+/* 0x0008 196 ( 1 2) */ sethi %hi(Zero),%g2
+/* 0x000c 187 ( 1 2) */ or %g0,%i2,%o1
+/* 0x0010 ( 2 3) */ st %i5,[%fp+88]
+/* 0x0014 ( 2 3) */ or %g0,%i3,%o2
+/* 0x0018 196 ( 2 3) */ add %g2,%lo(Zero),%g4
+/* 0x001c ( 3 6) */ ldd [%g2+%lo(Zero)],%f2
+/* 0x0020 187 ( 3 4) */ or %g0,%o2,%g5
+/* 0x0024 196 ( 3 4) */ or %g0,%o1,%i0
+/* 0x0028 187 ( 4 5) */ or %g0,%i4,%i2
+
+! 198 ! if (nlen!=16)
+! 199 ! {
+! 200 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
+! 202 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 203 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 205 ! pdtj=&(dt[0]);
+! 206 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
+! 207 ! {
+! 208 ! m2j=pdm2[j];
+! 209 ! a=pdtj[0]+pdn[0]*digit;
+! 210 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
+! 211 ! pdtj[1]=b;
+! 213 !#pragma pipeloop(0)
+! 214 ! for(i=1;i<nlen;i++)
+! 215 ! {
+! 216 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 217 ! }
+! 218 ! if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
+! 219 !
+! 220 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 221 ! }
+! 222 ! }
+! 223 ! else
+! 224 ! {
+! 225 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 227 ! dt[65]= dt[64]= dt[63]= dt[62]= dt[61]= dt[60]=
+! 228 ! dt[59]= dt[58]= dt[57]= dt[56]= dt[55]= dt[54]=
+! 229 ! dt[53]= dt[52]= dt[51]= dt[50]= dt[49]= dt[48]=
+! 230 ! dt[47]= dt[46]= dt[45]= dt[44]= dt[43]= dt[42]=
+! 231 ! dt[41]= dt[40]= dt[39]= dt[38]= dt[37]= dt[36]=
+! 232 ! dt[35]= dt[34]= dt[33]= dt[32]= dt[31]= dt[30]=
+! 233 ! dt[29]= dt[28]= dt[27]= dt[26]= dt[25]= dt[24]=
+! 234 ! dt[23]= dt[22]= dt[21]= dt[20]= dt[19]= dt[18]=
+! 235 ! dt[17]= dt[16]= dt[15]= dt[14]= dt[13]= dt[12]=
+! 236 ! dt[11]= dt[10]= dt[ 9]= dt[ 8]= dt[ 7]= dt[ 6]=
+! 237 ! dt[ 5]= dt[ 4]= dt[ 3]= dt[ 2]= dt[ 1]=Zero;
+! 239 ! pdn_0=pdn[0];
+! 240 ! pdm1_0=pdm1[0];
+! 242 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 243 ! pdtj=&(dt[0]);
+! 245 ! for(j=0;j<32;j++,pdtj++)
+! 246 ! {
+! 248 ! m2j=pdm2[j];
+! 249 ! a=pdtj[0]+pdn_0*digit;
+! 250 ! b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
+! 251 ! pdtj[1]=b;
+! 253 ! /**** this loop will be fully unrolled:
+! 254 ! for(i=1;i<16;i++)
+! 255 ! {
+! 256 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 257 ! }
+! 258 ! *************************************/
+! 259 ! pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
+! 260 ! pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
+! 261 ! pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
+! 262 ! pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
+! 263 ! pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
+! 264 ! pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
+! 265 ! pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
+! 266 ! pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
+! 267 ! pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
+! 268 ! pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
+! 269 ! pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
+! 270 ! pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
+! 271 ! pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
+! 272 ! pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
+! 273 ! pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
+! 274 ! /* no need for cleenup, cannot overflow */
+! 275 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 276 ! }
+! 277 ! }
+! 279 ! conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
+! 281 ! adjust_montf_result(result,nint,nlen);
+
+/* 0x002c 281 ( 4 5) */ or %g0,1,%o4
+/* 0x0030 187 ( 6 9) */ ldd [%fp+96],%f0
+/* 0x0034 196 ( 7 10) */ ld [%fp+92],%o0
+/* 0x0038 187 ( 8 9) */ fmovd %f0,%f16
+/* 0x003c 196 ( 9 10) */ sll %o0,4,%g2
+/* 0x0040 ( 9 10) */ or %g0,%o0,%g1
+/* 0x0044 198 (10 11) */ cmp %o0,16
+/* 0x0048 (10 11) */ be,pn %icc,.L77000289 ! tprob=0.50
+/* 0x004c (10 11) */ std %f2,[%o1+%g2]
+/* 0x0050 200 (11 12) */ sll %o0,2,%g2
+/* 0x0054 (11 14) */ ldd [%g4],%f2
+/* 0x0058 (12 13) */ add %g2,2,%o1
+/* 0x005c (12 13) */ add %g2,1,%o3
+/* 0x0060 196 (13 14) */ sll %o0,1,%o7
+/* 0x0064 200 (13 14) */ cmp %o1,0
+/* 0x0068 (13 14) */ ble,a,pt %icc,.L900000755 ! tprob=0.55
+/* 0x006c (14 17) */ ldd [%i1],%f0
+/* 0x0070 (14 15) */ cmp %o1,3
+/* 0x0074 281 (14 15) */ or %g0,1,%o1
+/* 0x0078 (14 15) */ bl,pn %icc,.L77000279 ! tprob=0.40
+/* 0x007c (15 16) */ add %o2,8,%o0
+/* 0x0080 (15 16) */ std %f2,[%g5]
+/* 0x0084 0 (16 17) */ or %g0,%o0,%o2
+ .L900000726: /* frequency 64.0 confidence 0.0 */
+/* 0x0088 ( 3 5) */ ldd [%g4],%f0
+/* 0x008c ( 3 4) */ add %o4,1,%o4
+/* 0x0090 ( 3 4) */ add %o2,8,%o2
+/* 0x0094 ( 4 4) */ cmp %o4,%o3
+/* 0x0098 ( 5 6) */ ble,pt %icc,.L900000726 ! tprob=0.50
+/* 0x009c ( 5 6) */ std %f0,[%o2-8]
+ .L900000729: /* frequency 8.0 confidence 0.0 */
+/* 0x00a0 ( 0 1) */ ba .L900000755 ! tprob=1.00
+/* 0x00a4 ( 0 3) */ ldd [%i1],%f0
+ .L77000279: /* frequency 0.6 confidence 0.0 */
+/* 0x00a8 ( 0 1) */ std %f2,[%o2]
+ .L900000754: /* frequency 5.3 confidence 0.0 */
+/* 0x00ac ( 0 3) */ ldd [%g4],%f2
+/* 0x00b0 ( 0 1) */ cmp %o1,%o3
+/* 0x00b4 ( 0 1) */ add %o2,8,%o2
+/* 0x00b8 ( 1 2) */ add %o1,1,%o1
+/* 0x00bc ( 1 2) */ ble,a,pt %icc,.L900000754 ! tprob=0.87
+/* 0x00c0 ( 3 4) */ std %f2,[%o2]
+ .L77000284: /* frequency 0.8 confidence 0.0 */
+/* 0x00c4 202 ( 0 3) */ ldd [%i1],%f0
+ .L900000755: /* frequency 0.8 confidence 0.0 */
+/* 0x00c8 202 ( 0 3) */ ldd [%i0],%f2
+/* 0x00cc ( 0 1) */ add %o7,1,%o2
+/* 0x00d0 206 ( 0 1) */ cmp %o7,0
+/* 0x00d4 ( 1 2) */ sll %o2,1,%o0
+/* 0x00d8 ( 1 2) */ sub %o7,1,%o1
+/* 0x00dc 202 ( 2 5) */ fmuld %f0,%f2,%f0
+/* 0x00e0 ( 2 3) */ std %f0,[%g5]
+/* 0x00e4 ( 2 3) */ sub %g1,1,%o7
+/* 0x00e8 ( 3 6) */ ldd [%g4],%f6
+/* 0x00ec 0 ( 3 4) */ or %g0,%o7,%g3
+/* 0x00f0 ( 3 4) */ or %g0,0,%l0
+/* 0x00f4 ( 4 7) */ ldd [%g4-8],%f2
+/* 0x00f8 ( 4 5) */ or %g0,0,%i5
+/* 0x00fc ( 4 5) */ or %g0,%o1,%o5
+/* 0x0100 ( 5 8) */ fdtox %f0,%f0
+/* 0x0104 ( 5 8) */ ldd [%g4-16],%f4
+/* 0x0108 ( 5 6) */ or %g0,%o0,%o3
+/* 0x010c 210 ( 6 7) */ add %i0,8,%o4
+/* 0x0110 ( 6 7) */ or %g0,0,%i4
+/* 0x0114 ( 9 10) */ fmovs %f6,%f0
+/* 0x0118 (11 14) */ fxtod %f0,%f0
+/* 0x011c 203 (14 17) */ fmuld %f0,%f16,%f0
+/* 0x0120 (17 20) */ fmuld %f0,%f2,%f2
+/* 0x0124 (20 23) */ fdtox %f2,%f2
+/* 0x0128 (23 26) */ fxtod %f2,%f2
+/* 0x012c (26 29) */ fmuld %f2,%f4,%f2
+/* 0x0130 (29 32) */ fsubd %f0,%f2,%f22
+/* 0x0134 206 (29 30) */ ble,pt %icc,.L900000748 ! tprob=0.60
+/* 0x0138 (29 30) */ sll %g1,4,%g2
+/* 0x013c 210 (30 33) */ ldd [%i2],%f0
+ .L900000749: /* frequency 5.3 confidence 0.0 */
+/* 0x0140 210 ( 0 3) */ fmuld %f0,%f22,%f8
+/* 0x0144 ( 0 3) */ ldd [%i1],%f0
+/* 0x0148 214 ( 0 1) */ cmp %g1,1
+/* 0x014c 210 ( 1 4) */ ldd [%o4+%i4],%f6
+/* 0x0150 ( 1 2) */ add %i1,8,%o0
+/* 0x0154 214 ( 1 2) */ or %g0,1,%o1
+/* 0x0158 210 ( 2 5) */ ldd [%i3],%f2
+/* 0x015c ( 2 3) */ add %i3,16,%l1
+/* 0x0160 ( 3 6) */ fmuld %f0,%f6,%f6
+/* 0x0164 ( 3 6) */ ldd [%g4-8],%f4
+/* 0x0168 ( 4 7) */ faddd %f2,%f8,%f2
+/* 0x016c ( 4 7) */ ldd [%i3+8],%f0
+/* 0x0170 208 ( 5 8) */ ldd [%i0+%i4],%f20
+/* 0x0174 210 ( 6 9) */ faddd %f0,%f6,%f0
+/* 0x0178 ( 7 10) */ fmuld %f2,%f4,%f2
+/* 0x017c (10 13) */ faddd %f0,%f2,%f18
+/* 0x0180 211 (10 11) */ std %f18,[%i3+8]
+/* 0x0184 214 (10 11) */ ble,pt %icc,.L900000753 ! tprob=0.54
+/* 0x0188 (11 12) */ srl %i5,31,%g2
+/* 0x018c (11 12) */ cmp %g3,7
+/* 0x0190 210 (12 13) */ add %i2,8,%g2
+/* 0x0194 214 (12 13) */ bl,pn %icc,.L77000281 ! tprob=0.36
+/* 0x0198 (13 14) */ add %g2,24,%o2
+/* 0x019c 216 (13 16) */ ldd [%o0+16],%f14
+/* 0x01a0 (13 14) */ add %i3,48,%l1
+/* 0x01a4 (14 17) */ ldd [%o0+24],%f12
+/* 0x01a8 0 (14 15) */ or %g0,%o2,%g2
+/* 0x01ac 214 (14 15) */ sub %g1,3,%o2
+/* 0x01b0 216 (15 18) */ ldd [%o0],%f2
+/* 0x01b4 (15 16) */ or %g0,5,%o1
+/* 0x01b8 (16 19) */ ldd [%g2-24],%f0
+/* 0x01bc (17 20) */ ldd [%o0+8],%f6
+/* 0x01c0 (17 20) */ fmuld %f2,%f20,%f2
+/* 0x01c4 (17 18) */ add %o0,32,%o0
+/* 0x01c8 (18 21) */ ldd [%g2-16],%f8
+/* 0x01cc (18 21) */ fmuld %f0,%f22,%f4
+/* 0x01d0 (19 22) */ ldd [%i3+16],%f0
+/* 0x01d4 (19 22) */ fmuld %f6,%f20,%f10
+/* 0x01d8 (20 23) */ ldd [%g2-8],%f6
+/* 0x01dc (21 24) */ faddd %f2,%f4,%f4
+/* 0x01e0 (21 24) */ ldd [%i3+32],%f2
+ .L900000738: /* frequency 512.0 confidence 0.0 */
+/* 0x01e4 216 (16 24) */ ldd [%g2],%f24
+/* 0x01e8 (16 17) */ add %o1,3,%o1
+/* 0x01ec (16 17) */ add %g2,24,%g2
+/* 0x01f0 (16 19) */ fmuld %f8,%f22,%f8
+/* 0x01f4 (17 25) */ ldd [%l1],%f28
+/* 0x01f8 (17 17) */ cmp %o1,%o2
+/* 0x01fc (17 18) */ add %o0,24,%o0
+/* 0x0200 (18 26) */ ldd [%o0-24],%f26
+/* 0x0204 (18 21) */ faddd %f0,%f4,%f0
+/* 0x0208 (18 19) */ add %l1,48,%l1
+/* 0x020c (19 22) */ faddd %f10,%f8,%f10
+/* 0x0210 (19 22) */ fmuld %f14,%f20,%f4
+/* 0x0214 (19 20) */ std %f0,[%l1-80]
+/* 0x0218 (20 28) */ ldd [%g2-16],%f8
+/* 0x021c (20 23) */ fmuld %f6,%f22,%f6
+/* 0x0220 (21 29) */ ldd [%l1-32],%f0
+/* 0x0224 (22 30) */ ldd [%o0-16],%f14
+/* 0x0228 (22 25) */ faddd %f2,%f10,%f2
+/* 0x022c (23 26) */ faddd %f4,%f6,%f10
+/* 0x0230 (23 26) */ fmuld %f12,%f20,%f4
+/* 0x0234 (23 24) */ std %f2,[%l1-64]
+/* 0x0238 (24 32) */ ldd [%g2-8],%f6
+/* 0x023c (24 27) */ fmuld %f24,%f22,%f24
+/* 0x0240 (25 33) */ ldd [%l1-16],%f2
+/* 0x0244 (26 34) */ ldd [%o0-8],%f12
+/* 0x0248 (26 29) */ faddd %f28,%f10,%f10
+/* 0x024c (27 28) */ std %f10,[%l1-48]
+/* 0x0250 (27 30) */ fmuld %f26,%f20,%f10
+/* 0x0254 (27 28) */ ble,pt %icc,.L900000738 ! tprob=0.50
+/* 0x0258 (27 30) */ faddd %f4,%f24,%f4
+ .L900000741: /* frequency 64.0 confidence 0.0 */
+/* 0x025c 216 ( 0 3) */ fmuld %f8,%f22,%f28
+/* 0x0260 ( 0 3) */ ldd [%g2],%f24
+/* 0x0264 ( 0 3) */ faddd %f0,%f4,%f26
+/* 0x0268 ( 1 4) */ fmuld %f12,%f20,%f8
+/* 0x026c ( 1 2) */ add %l1,32,%l1
+/* 0x0270 ( 1 2) */ cmp %o1,%g3
+/* 0x0274 ( 2 5) */ fmuld %f14,%f20,%f14
+/* 0x0278 ( 2 5) */ ldd [%l1-32],%f4
+/* 0x027c ( 2 3) */ add %g2,8,%g2
+/* 0x0280 ( 3 6) */ faddd %f10,%f28,%f12
+/* 0x0284 ( 3 6) */ fmuld %f6,%f22,%f6
+/* 0x0288 ( 3 6) */ ldd [%l1-16],%f0
+/* 0x028c ( 4 7) */ fmuld %f24,%f22,%f10
+/* 0x0290 ( 4 5) */ std %f26,[%l1-64]
+/* 0x0294 ( 6 9) */ faddd %f2,%f12,%f2
+/* 0x0298 ( 6 7) */ std %f2,[%l1-48]
+/* 0x029c ( 7 10) */ faddd %f14,%f6,%f6
+/* 0x02a0 ( 8 11) */ faddd %f8,%f10,%f2
+/* 0x02a4 (10 13) */ faddd %f4,%f6,%f4
+/* 0x02a8 (10 11) */ std %f4,[%l1-32]
+/* 0x02ac (11 14) */ faddd %f0,%f2,%f0
+/* 0x02b0 (11 12) */ bg,pn %icc,.L77000213 ! tprob=0.13
+/* 0x02b4 (11 12) */ std %f0,[%l1-16]
+ .L77000281: /* frequency 4.0 confidence 0.0 */
+/* 0x02b8 216 ( 0 3) */ ldd [%o0],%f0
+ .L900000752: /* frequency 36.6 confidence 0.0 */
+/* 0x02bc 216 ( 0 3) */ ldd [%g2],%f4
+/* 0x02c0 ( 0 3) */ fmuld %f0,%f20,%f2
+/* 0x02c4 ( 0 1) */ add %o1,1,%o1
+/* 0x02c8 ( 1 4) */ ldd [%l1],%f0
+/* 0x02cc ( 1 2) */ add %o0,8,%o0
+/* 0x02d0 ( 1 2) */ add %g2,8,%g2
+/* 0x02d4 ( 2 5) */ fmuld %f4,%f22,%f4
+/* 0x02d8 ( 2 3) */ cmp %o1,%g3
+/* 0x02dc ( 5 8) */ faddd %f2,%f4,%f2
+/* 0x02e0 ( 8 11) */ faddd %f0,%f2,%f0
+/* 0x02e4 ( 8 9) */ std %f0,[%l1]
+/* 0x02e8 ( 8 9) */ add %l1,16,%l1
+/* 0x02ec ( 8 9) */ ble,a,pt %icc,.L900000752 ! tprob=0.87
+/* 0x02f0 (10 13) */ ldd [%o0],%f0
+ .L77000213: /* frequency 5.3 confidence 0.0 */
+/* 0x02f4 ( 0 1) */ srl %i5,31,%g2
+ .L900000753: /* frequency 5.3 confidence 0.0 */
+/* 0x02f8 218 ( 0 1) */ cmp %l0,30
+/* 0x02fc ( 0 1) */ bne,a,pt %icc,.L900000751 ! tprob=0.54
+/* 0x0300 ( 0 3) */ fdtox %f18,%f0
+/* 0x0304 ( 1 2) */ add %i5,%g2,%g2
+/* 0x0308 ( 1 2) */ sub %o3,1,%o2
+/* 0x030c ( 2 3) */ sra %g2,1,%o0
+/* 0x0310 216 ( 2 5) */ ldd [%g4],%f0
+/* 0x0314 ( 3 4) */ add %o0,1,%g2
+/* 0x0318 ( 4 5) */ sll %g2,1,%o0
+/* 0x031c ( 4 5) */ fmovd %f0,%f2
+/* 0x0320 ( 5 6) */ sll %g2,4,%o1
+/* 0x0324 ( 5 6) */ cmp %o0,%o3
+/* 0x0328 ( 5 6) */ bge,pt %icc,.L77000215 ! tprob=0.53
+/* 0x032c ( 6 7) */ or %g0,0,%l0
+/* 0x0330 218 ( 6 7) */ add %g5,%o1,%o1
+/* 0x0334 216 ( 7 10) */ ldd [%o1],%f8
+ .L900000750: /* frequency 32.0 confidence 0.0 */
+/* 0x0338 ( 0 3) */ fdtox %f8,%f6
+/* 0x033c ( 0 3) */ ldd [%g4],%f10
+/* 0x0340 ( 0 1) */ add %o0,2,%o0
+/* 0x0344 ( 1 4) */ ldd [%o1+8],%f4
+/* 0x0348 ( 1 4) */ fdtox %f8,%f8
+/* 0x034c ( 1 2) */ cmp %o0,%o2
+/* 0x0350 ( 5 6) */ fmovs %f10,%f6
+/* 0x0354 ( 7 10) */ fxtod %f6,%f10
+/* 0x0358 ( 8 11) */ fdtox %f4,%f6
+/* 0x035c ( 9 12) */ fdtox %f4,%f4
+/* 0x0360 (10 13) */ faddd %f10,%f2,%f2
+/* 0x0364 (10 11) */ std %f2,[%o1]
+/* 0x0368 (12 15) */ ldd [%g4],%f2
+/* 0x036c (14 15) */ fmovs %f2,%f6
+/* 0x0370 (16 19) */ fxtod %f6,%f6
+/* 0x0374 (17 20) */ fitod %f8,%f2
+/* 0x0378 (19 22) */ faddd %f6,%f0,%f0
+/* 0x037c (19 20) */ std %f0,[%o1+8]
+/* 0x0380 (19 20) */ add %o1,16,%o1
+/* 0x0384 (20 23) */ fitod %f4,%f0
+/* 0x0388 (20 21) */ ble,a,pt %icc,.L900000750 ! tprob=0.87
+/* 0x038c (20 23) */ ldd [%o1],%f8
+ .L77000233: /* frequency 4.6 confidence 0.0 */
+/* 0x0390 ( 0 0) */ or %g0,0,%l0
+ .L77000215: /* frequency 5.3 confidence 0.0 */
+/* 0x0394 ( 0 3) */ fdtox %f18,%f0
+ .L900000751: /* frequency 5.3 confidence 0.0 */
+/* 0x0398 ( 0 3) */ ldd [%g4],%f6
+/* 0x039c 220 ( 0 1) */ add %i5,1,%i5
+/* 0x03a0 ( 0 1) */ add %i4,8,%i4
+/* 0x03a4 ( 1 4) */ ldd [%g4-8],%f2
+/* 0x03a8 ( 1 2) */ add %l0,1,%l0
+/* 0x03ac ( 1 2) */ add %i3,8,%i3
+/* 0x03b0 ( 2 3) */ fmovs %f6,%f0
+/* 0x03b4 ( 2 5) */ ldd [%g4-16],%f4
+/* 0x03b8 ( 2 3) */ cmp %i5,%o5
+/* 0x03bc ( 4 7) */ fxtod %f0,%f0
+/* 0x03c0 ( 7 10) */ fmuld %f0,%f16,%f0
+/* 0x03c4 (10 13) */ fmuld %f0,%f2,%f2
+/* 0x03c8 (13 16) */ fdtox %f2,%f2
+/* 0x03cc (16 19) */ fxtod %f2,%f2
+/* 0x03d0 (19 22) */ fmuld %f2,%f4,%f2
+/* 0x03d4 (22 25) */ fsubd %f0,%f2,%f22
+/* 0x03d8 (22 23) */ ble,a,pt %icc,.L900000749 ! tprob=0.89
+/* 0x03dc (22 25) */ ldd [%i2],%f0
+ .L900000725: /* frequency 0.7 confidence 0.0 */
+/* 0x03e0 220 ( 0 1) */ ba .L900000748 ! tprob=1.00
+/* 0x03e4 ( 0 1) */ sll %g1,4,%g2
+
+
+ .L77000289: /* frequency 0.8 confidence 0.0 */
+/* 0x03e8 225 ( 0 3) */ ldd [%o1],%f6
+/* 0x03ec 242 ( 0 1) */ add %g4,-8,%g2
+/* 0x03f0 ( 0 1) */ add %g4,-16,%g3
+/* 0x03f4 225 ( 1 4) */ ldd [%i1],%f2
+/* 0x03f8 245 ( 1 2) */ or %g0,0,%o3
+/* 0x03fc ( 1 2) */ or %g0,0,%o0
+/* 0x0400 225 ( 3 6) */ fmuld %f2,%f6,%f2
+/* 0x0404 ( 3 4) */ std %f2,[%o2]
+/* 0x0408 ( 4 7) */ ldd [%g4],%f6
+/* 0x040c 237 ( 7 8) */ std %f6,[%o2+8]
+/* 0x0410 ( 8 9) */ std %f6,[%o2+16]
+/* 0x0414 ( 9 10) */ std %f6,[%o2+24]
+/* 0x0418 (10 11) */ std %f6,[%o2+32]
+/* 0x041c (11 12) */ std %f6,[%o2+40]
+/* 0x0420 (12 13) */ std %f6,[%o2+48]
+/* 0x0424 (13 14) */ std %f6,[%o2+56]
+/* 0x0428 (14 15) */ std %f6,[%o2+64]
+/* 0x042c (15 16) */ std %f6,[%o2+72]
+! prefetch [%i4],0
+! prefetch [%i4+32],0
+! prefetch [%i4+64],0
+! prefetch [%i4+96],0
+! prefetch [%i4+120],0
+! prefetch [%i1],0
+! prefetch [%i1+32],0
+! prefetch [%i1+64],0
+! prefetch [%i1+96],0
+! prefetch [%i1+120],0
+/* 0x0430 (16 17) */ std %f6,[%o2+80]
+/* 0x0434 (17 18) */ std %f6,[%o2+88]
+/* 0x0438 (18 19) */ std %f6,[%o2+96]
+/* 0x043c (19 20) */ std %f6,[%o2+104]
+/* 0x0440 (20 21) */ std %f6,[%o2+112]
+/* 0x0444 (21 22) */ std %f6,[%o2+120]
+/* 0x0448 (22 23) */ std %f6,[%o2+128]
+/* 0x044c (23 24) */ std %f6,[%o2+136]
+/* 0x0450 (24 25) */ std %f6,[%o2+144]
+/* 0x0454 (25 26) */ std %f6,[%o2+152]
+/* 0x0458 (26 27) */ std %f6,[%o2+160]
+/* 0x045c (27 28) */ std %f6,[%o2+168]
+/* 0x0460 (27 30) */ fdtox %f2,%f2
+/* 0x0464 (28 29) */ std %f6,[%o2+176]
+/* 0x0468 (29 30) */ std %f6,[%o2+184]
+/* 0x046c (30 31) */ std %f6,[%o2+192]
+/* 0x0470 (31 32) */ std %f6,[%o2+200]
+/* 0x0474 (32 33) */ std %f6,[%o2+208]
+/* 0x0478 (33 34) */ std %f6,[%o2+216]
+/* 0x047c (34 35) */ std %f6,[%o2+224]
+/* 0x0480 (35 36) */ std %f6,[%o2+232]
+/* 0x0484 (36 37) */ std %f6,[%o2+240]
+/* 0x0488 (37 38) */ std %f6,[%o2+248]
+/* 0x048c (38 39) */ std %f6,[%o2+256]
+/* 0x0490 (39 40) */ std %f6,[%o2+264]
+/* 0x0494 (40 41) */ std %f6,[%o2+272]
+/* 0x0498 (41 42) */ std %f6,[%o2+280]
+/* 0x049c (42 43) */ std %f6,[%o2+288]
+/* 0x04a0 (43 44) */ std %f6,[%o2+296]
+/* 0x04a4 (44 45) */ std %f6,[%o2+304]
+/* 0x04a8 (45 46) */ std %f6,[%o2+312]
+/* 0x04ac (46 47) */ std %f6,[%o2+320]
+/* 0x04b0 (47 48) */ std %f6,[%o2+328]
+/* 0x04b4 (48 49) */ std %f6,[%o2+336]
+/* 0x04b8 (49 50) */ std %f6,[%o2+344]
+/* 0x04bc (50 51) */ std %f6,[%o2+352]
+/* 0x04c0 (51 52) */ std %f6,[%o2+360]
+/* 0x04c4 (52 53) */ std %f6,[%o2+368]
+/* 0x04c8 (53 54) */ std %f6,[%o2+376]
+/* 0x04cc (54 55) */ std %f6,[%o2+384]
+/* 0x04d0 (55 56) */ std %f6,[%o2+392]
+/* 0x04d4 (56 57) */ std %f6,[%o2+400]
+/* 0x04d8 (57 58) */ std %f6,[%o2+408]
+/* 0x04dc (58 59) */ std %f6,[%o2+416]
+/* 0x04e0 (59 60) */ std %f6,[%o2+424]
+/* 0x04e4 (60 61) */ std %f6,[%o2+432]
+/* 0x04e8 (61 62) */ std %f6,[%o2+440]
+/* 0x04ec (62 63) */ std %f6,[%o2+448]
+/* 0x04f0 (63 64) */ std %f6,[%o2+456]
+/* 0x04f4 (64 65) */ std %f6,[%o2+464]
+/* 0x04f8 (65 66) */ std %f6,[%o2+472]
+/* 0x04fc (66 67) */ std %f6,[%o2+480]
+/* 0x0500 (67 68) */ std %f6,[%o2+488]
+/* 0x0504 (68 69) */ std %f6,[%o2+496]
+/* 0x0508 (69 70) */ std %f6,[%o2+504]
+/* 0x050c (70 71) */ std %f6,[%o2+512]
+/* 0x0510 (71 72) */ std %f6,[%o2+520]
+/* 0x0514 242 (72 75) */ ld [%g4],%f2 ! dalign
+/* 0x0518 (73 76) */ ld [%g2],%f6 ! dalign
+/* 0x051c (74 77) */ fxtod %f2,%f10
+/* 0x0520 (74 77) */ ld [%g2+4],%f7
+/* 0x0524 (75 78) */ ld [%g3],%f8 ! dalign
+/* 0x0528 (76 79) */ ld [%g3+4],%f9
+/* 0x052c (77 80) */ fmuld %f10,%f0,%f0
+/* 0x0530 239 (77 80) */ ldd [%i4],%f4
+/* 0x0534 240 (78 81) */ ldd [%i1],%f2
+/* 0x0538 (80 83) */ fmuld %f0,%f6,%f6
+/* 0x053c (83 86) */ fdtox %f6,%f6
+/* 0x0540 (86 89) */ fxtod %f6,%f6
+/* 0x0544 (89 92) */ fmuld %f6,%f8,%f6
+/* 0x0548 (92 95) */ fsubd %f0,%f6,%f0
+/* 0x054c 250 (95 98) */ fmuld %f4,%f0,%f10
+ .L900000747: /* frequency 6.4 confidence 0.0 */
+
+
+ fmovd %f0,%f0
+ fmovd %f16,%f18
+ ldd [%i4],%f2
+ ldd [%o2],%f8
+ ldd [%i1],%f10
+ ldd [%g4-8],%f14
+ ldd [%g4-16],%f16
+ ldd [%o1],%f24
+
+ ldd [%i1+8],%f26
+ ldd [%i1+16],%f40
+ ldd [%i1+48],%f46
+ ldd [%i1+56],%f30
+ ldd [%i1+64],%f54
+ ldd [%i1+104],%f34
+ ldd [%i1+112],%f58
+
+ ldd [%i4+112],%f60
+ ldd [%i4+8],%f28
+ ldd [%i4+104],%f38
+
+ nop
+ nop
+!
+ .L99999999:
+!1
+!!!
+ ldd [%i1+24],%f32
+ fmuld %f0,%f2,%f4
+!2
+!!!
+ ldd [%i4+24],%f36
+ fmuld %f26,%f24,%f20
+!3
+!!!
+ ldd [%i1+40],%f42
+ fmuld %f28,%f0,%f22
+!4
+!!!
+ ldd [%i4+40],%f44
+ fmuld %f32,%f24,%f32
+!5
+!!!
+ ldd [%o1+8],%f6
+ faddd %f4,%f8,%f4
+ fmuld %f36,%f0,%f36
+!6
+!!!
+ add %o1,8,%o1
+ ldd [%i4+56],%f50
+ fmuld %f42,%f24,%f42
+!7
+!!!
+ ldd [%i1+72],%f52
+ faddd %f20,%f22,%f20
+ fmuld %f44,%f0,%f44
+!8
+!!!
+ ldd [%o2+16],%f22
+ fmuld %f10,%f6,%f12
+!9
+!!!
+ ldd [%i4+72],%f56
+ faddd %f32,%f36,%f32
+ fmuld %f14,%f4,%f4
+!10
+!!!
+ ldd [%o2+48],%f36
+ fmuld %f30,%f24,%f48
+!11
+!!!
+ ldd [%o2+8],%f8
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50
+!12
+!!!
+ std %f20,[%o2+16]
+ faddd %f42,%f44,%f42
+ fmuld %f52,%f24,%f52
+!13
+!!!
+ ldd [%o2+80],%f44
+ faddd %f4,%f12,%f4
+ fmuld %f56,%f0,%f56
+!14
+!!!
+ ldd [%i1+88],%f20
+ faddd %f32,%f36,%f32
+!15
+!!!
+ ldd [%i4+88],%f22
+ faddd %f48,%f50,%f48
+!16
+!!!
+ ldd [%o2+112],%f50
+ faddd %f52,%f56,%f52
+!17
+!!!
+ ldd [%o2+144],%f56
+ faddd %f4,%f8,%f8
+ fmuld %f20,%f24,%f20
+!18
+!!!
+ std %f32,[%o2+48]
+ faddd %f42,%f44,%f42
+ fmuld %f22,%f0,%f22
+!19
+!!!
+ std %f42,[%o2+80]
+ faddd %f48,%f50,%f48
+ fmuld %f34,%f24,%f32
+!20
+!!!
+ std %f48,[%o2+112]
+ faddd %f52,%f56,%f52
+ fmuld %f38,%f0,%f36
+!21
+!!!
+ ldd [%i1+120],%f42
+ fdtox %f8,%f4
+!22
+!!!
+ std %f52,[%o2+144]
+ faddd %f20,%f22,%f20
+!23
+!!!
+ ldd [%i4+120],%f44
+!24
+!!!
+ ldd [%o2+176],%f22
+ faddd %f32,%f36,%f32
+ fmuld %f42,%f24,%f42
+!25
+!!!
+ ldd [%i4+16],%f50
+ fmovs %f17,%f4
+!26
+!!!
+ ldd [%i1+32],%f52
+ fmuld %f44,%f0,%f44
+!27
+!!!
+ ldd [%i4+32],%f56
+ fmuld %f40,%f24,%f48
+!28
+!!!
+ ldd [%o2+208],%f36
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50
+!29
+!!!
+ std %f20,[%o2+176]
+ fxtod %f4,%f4
+ fmuld %f52,%f24,%f52
+!30
+!!!
+ ldd [%i4+48],%f22
+ faddd %f42,%f44,%f42
+ fmuld %f56,%f0,%f56
+!31
+!!!
+ ldd [%o2+240],%f44
+ faddd %f32,%f36,%f32
+!32
+!!!
+ std %f32,[%o2+208]
+ faddd %f48,%f50,%f48
+ fmuld %f46,%f24,%f20
+!33
+!!!
+ ldd [%o2+32],%f50
+ fmuld %f4,%f18,%f12
+!34
+!!!
+ ldd [%i4+64],%f36
+ faddd %f52,%f56,%f52
+ fmuld %f22,%f0,%f22
+!35
+!!!
+ ldd [%o2+64],%f56
+ faddd %f42,%f44,%f42
+!36
+!!!
+ std %f42,[%o2+240]
+ faddd %f48,%f50,%f48
+ fmuld %f54,%f24,%f32
+!37
+!!!
+ std %f48,[%o2+32]
+ fmuld %f12,%f14,%f4
+!38
+!!!
+ ldd [%i1+80],%f42
+ faddd %f52,%f56,%f56 ! yes, tmp52!
+ fmuld %f36,%f0,%f36
+!39
+!!!
+ ldd [%i4+80],%f44
+ faddd %f20,%f22,%f20
+!40
+!!!
+ ldd [%i1+96],%f48
+ fmuld %f58,%f24,%f52
+!41
+!!!
+ ldd [%i4+96],%f50
+ fdtox %f4,%f4
+ fmuld %f42,%f24,%f42
+!42
+!!!
+ std %f56,[%o2+64] ! yes, tmp52!
+ faddd %f32,%f36,%f32
+ fmuld %f44,%f0,%f44
+!43
+!!!
+ ldd [%o2+96],%f22
+ fmuld %f48,%f24,%f48
+!44
+!!!
+ ldd [%o2+128],%f36
+ fmovd %f6,%f24
+ fmuld %f50,%f0,%f50
+!45
+!!!
+ fxtod %f4,%f4
+ fmuld %f60,%f0,%f56
+!46
+!!!
+ add %o2,8,%o2
+ faddd %f42,%f44,%f42
+!47
+!!!
+ ldd [%o2+160-8],%f44
+ faddd %f20,%f22,%f20
+!48
+!!!
+ std %f20,[%o2+96-8]
+ faddd %f48,%f50,%f48
+!49
+!!!
+ ldd [%o2+192-8],%f50
+ faddd %f52,%f56,%f52
+ fmuld %f4,%f16,%f4
+!50
+!!!
+ ldd [%o2+224-8],%f56
+ faddd %f32,%f36,%f32
+!51
+!!!
+ std %f32,[%o2+128-8]
+ faddd %f42,%f44,%f42
+!52
+ add %o3,1,%o3
+ std %f42,[%o2+160-8]
+ faddd %f48,%f50,%f48
+!53
+!!!
+ cmp %o3,31
+ std %f48,[%o2+192-8]
+ faddd %f52,%f56,%f52
+!54
+ std %f52,[%o2+224-8]
+ ble,pt %icc,.L99999999
+ fsubd %f12,%f4,%f0
+
+
+
+!55
+ std %f8,[%o2]
+
+
+
+
+
+
+ .L77000285: /* frequency 1.0 confidence 0.0 */
+/* 0x07a8 279 ( 0 1) */ sll %g1,4,%g2
+ .L900000748: /* frequency 1.0 confidence 0.0 */
+/* 0x07ac 279 ( 0 3) */ ldd [%g5+%g2],%f0
+/* 0x07b0 ( 0 1) */ add %g5,%g2,%i1
+/* 0x07b4 ( 0 1) */ or %g0,0,%o4
+/* 0x07b8 206 ( 1 4) */ ld [%fp+68],%o0
+/* 0x07bc 279 ( 1 2) */ or %g0,0,%i0
+/* 0x07c0 ( 1 2) */ cmp %g1,0
+/* 0x07c4 ( 2 5) */ fdtox %f0,%f0
+/* 0x07c8 ( 2 3) */ std %f0,[%sp+120]
+/* 0x07cc 275 ( 2 3) */ sethi %hi(0xfc00),%o1
+/* 0x07d0 206 ( 3 4) */ or %g0,%o0,%o3
+/* 0x07d4 275 ( 3 4) */ sub %g1,1,%g4
+/* 0x07d8 279 ( 4 7) */ ldd [%i1+8],%f0
+/* 0x07dc ( 4 5) */ or %g0,%o0,%g5
+/* 0x07e0 ( 4 5) */ add %o1,1023,%o1
+/* 0x07e4 ( 6 9) */ fdtox %f0,%f0
+/* 0x07e8 ( 6 7) */ std %f0,[%sp+112]
+/* 0x07ec (10 12) */ ldx [%sp+112],%o5
+/* 0x07f0 (11 13) */ ldx [%sp+120],%o7
+/* 0x07f4 (11 12) */ ble,pt %icc,.L900000746 ! tprob=0.56
+/* 0x07f8 (11 12) */ sethi %hi(0xfc00),%g2
+/* 0x07fc 275 (12 13) */ or %g0,-1,%g2
+/* 0x0800 279 (12 13) */ cmp %g1,3
+/* 0x0804 275 (13 14) */ srl %g2,0,%o2
+/* 0x0808 279 (13 14) */ bl,pn %icc,.L77000286 ! tprob=0.44
+/* 0x080c (13 14) */ or %g0,%i1,%g2
+/* 0x0810 (14 17) */ ldd [%i1+16],%f0
+/* 0x0814 (14 15) */ and %o5,%o1,%o0
+/* 0x0818 (14 15) */ add %i1,16,%g2
+/* 0x081c (15 16) */ sllx %o0,16,%g3
+/* 0x0820 (15 16) */ and %o7,%o2,%o0
+/* 0x0824 (16 19) */ fdtox %f0,%f0
+/* 0x0828 (16 17) */ std %f0,[%sp+104]
+/* 0x082c (16 17) */ add %o0,%g3,%o4
+/* 0x0830 (17 20) */ ldd [%i1+24],%f2
+/* 0x0834 (17 18) */ srax %o5,16,%o0
+/* 0x0838 (17 18) */ add %o3,4,%g5
+/* 0x083c (18 19) */ stx %o0,[%sp+128]
+/* 0x0840 (18 19) */ and %o4,%o2,%o0
+/* 0x0844 (18 19) */ or %g0,1,%i0
+/* 0x0848 (19 20) */ stx %o0,[%sp+112]
+/* 0x084c (19 20) */ srax %o4,32,%o0
+/* 0x0850 (19 22) */ fdtox %f2,%f0
+/* 0x0854 (20 21) */ stx %o0,[%sp+136]
+/* 0x0858 (20 21) */ srax %o7,32,%o4
+/* 0x085c (21 22) */ std %f0,[%sp+96]
+/* 0x0860 (22 24) */ ldx [%sp+136],%o7
+/* 0x0864 (23 25) */ ldx [%sp+128],%o0
+/* 0x0868 (25 27) */ ldx [%sp+104],%g3
+/* 0x086c (25 26) */ add %o0,%o7,%o0
+/* 0x0870 (26 28) */ ldx [%sp+112],%o7
+/* 0x0874 (26 27) */ add %o4,%o0,%o4
+/* 0x0878 (27 29) */ ldx [%sp+96],%o5
+/* 0x087c (28 29) */ st %o7,[%o3]
+/* 0x0880 (28 29) */ or %g0,%g3,%o7
+ .L900000730: /* frequency 64.0 confidence 0.0 */
+/* 0x0884 (17 19) */ ldd [%g2+16],%f0
+/* 0x0888 (17 18) */ add %i0,1,%i0
+/* 0x088c (17 18) */ add %g5,4,%g5
+/* 0x0890 (18 18) */ cmp %i0,%g4
+/* 0x0894 (18 19) */ add %g2,16,%g2
+/* 0x0898 (19 22) */ fdtox %f0,%f0
+/* 0x089c (20 21) */ std %f0,[%sp+104]
+/* 0x08a0 (21 23) */ ldd [%g2+8],%f0
+/* 0x08a4 (23 26) */ fdtox %f0,%f0
+/* 0x08a8 (24 25) */ std %f0,[%sp+96]
+/* 0x08ac (25 26) */ and %o5,%o1,%g3
+/* 0x08b0 (26 27) */ sllx %g3,16,%g3
+/* 0x08b4 ( 0 0) */ stx %g3,[%sp+120]
+/* 0x08b8 (26 27) */ and %o7,%o2,%g3
+/* 0x08bc ( 0 0) */ stx %o7,[%sp+128]
+/* 0x08c0 ( 0 0) */ ldx [%sp+120],%o7
+/* 0x08c4 (27 27) */ add %g3,%o7,%g3
+/* 0x08c8 ( 0 0) */ ldx [%sp+128],%o7
+/* 0x08cc (28 29) */ srax %o5,16,%o5
+/* 0x08d0 (28 28) */ add %g3,%o4,%g3
+/* 0x08d4 (29 30) */ srax %g3,32,%o4
+/* 0x08d8 ( 0 0) */ stx %o4,[%sp+112]
+/* 0x08dc (30 31) */ srax %o7,32,%o4
+/* 0x08e0 ( 0 0) */ ldx [%sp+112],%o7
+/* 0x08e4 (30 31) */ add %o5,%o7,%o7
+/* 0x08e8 (31 33) */ ldx [%sp+96],%o5
+/* 0x08ec (31 32) */ add %o4,%o7,%o4
+/* 0x08f0 (32 33) */ and %g3,%o2,%g3
+/* 0x08f4 ( 0 0) */ ldx [%sp+104],%o7
+/* 0x08f8 (33 34) */ ble,pt %icc,.L900000730 ! tprob=0.50
+/* 0x08fc (33 34) */ st %g3,[%g5-4]
+ .L900000733: /* frequency 8.0 confidence 0.0 */
+/* 0x0900 ( 0 1) */ ba .L900000746 ! tprob=1.00
+/* 0x0904 ( 0 1) */ sethi %hi(0xfc00),%g2
+ .L77000286: /* frequency 0.7 confidence 0.0 */
+/* 0x0908 ( 0 3) */ ldd [%g2+16],%f0
+ .L900000745: /* frequency 6.4 confidence 0.0 */
+/* 0x090c ( 0 1) */ and %o7,%o2,%o0
+/* 0x0910 ( 0 1) */ and %o5,%o1,%g3
+/* 0x0914 ( 0 3) */ fdtox %f0,%f0
+/* 0x0918 ( 1 2) */ add %o4,%o0,%o0
+/* 0x091c ( 1 2) */ std %f0,[%sp+104]
+/* 0x0920 ( 1 2) */ add %i0,1,%i0
+/* 0x0924 ( 2 3) */ sllx %g3,16,%o4
+/* 0x0928 ( 2 5) */ ldd [%g2+24],%f2
+/* 0x092c ( 2 3) */ add %g2,16,%g2
+/* 0x0930 ( 3 4) */ add %o0,%o4,%o4
+/* 0x0934 ( 3 4) */ cmp %i0,%g4
+/* 0x0938 ( 4 5) */ srax %o5,16,%o0
+/* 0x093c ( 4 5) */ stx %o0,[%sp+112]
+/* 0x0940 ( 4 5) */ and %o4,%o2,%g3
+/* 0x0944 ( 5 6) */ srax %o4,32,%o5
+/* 0x0948 ( 5 8) */ fdtox %f2,%f0
+/* 0x094c ( 5 6) */ std %f0,[%sp+96]
+/* 0x0950 ( 6 7) */ srax %o7,32,%o4
+/* 0x0954 ( 6 8) */ ldx [%sp+112],%o7
+/* 0x0958 ( 8 9) */ add %o7,%o5,%o7
+/* 0x095c ( 9 11) */ ldx [%sp+104],%o5
+/* 0x0960 ( 9 10) */ add %o4,%o7,%o4
+/* 0x0964 (10 12) */ ldx [%sp+96],%o0
+/* 0x0968 (11 12) */ st %g3,[%g5]
+/* 0x096c (11 12) */ or %g0,%o5,%o7
+/* 0x0970 (11 12) */ add %g5,4,%g5
+/* 0x0974 (12 13) */ or %g0,%o0,%o5
+/* 0x0978 (12 13) */ ble,a,pt %icc,.L900000745 ! tprob=0.86
+/* 0x097c (12 15) */ ldd [%g2+16],%f0
+ .L77000236: /* frequency 1.0 confidence 0.0 */
+/* 0x0980 ( 0 1) */ sethi %hi(0xfc00),%g2
+ .L900000746: /* frequency 1.0 confidence 0.0 */
+/* 0x0984 ( 0 1) */ or %g0,-1,%o0
+/* 0x0988 ( 0 1) */ add %g2,1023,%g2
+/* 0x098c ( 0 3) */ ld [%fp+88],%o1
+/* 0x0990 ( 1 2) */ srl %o0,0,%g3
+/* 0x0994 ( 1 2) */ and %o5,%g2,%g2
+/* 0x0998 ( 2 3) */ and %o7,%g3,%g4
+/* 0x099c 281 ( 2 3) */ or %g0,-1,%o5
+/* 0x09a0 275 ( 3 4) */ sllx %g2,16,%g2
+/* 0x09a4 ( 3 4) */ add %o4,%g4,%g4
+/* 0x09a8 ( 4 5) */ add %g4,%g2,%g2
+/* 0x09ac ( 5 6) */ sll %i0,2,%g4
+/* 0x09b0 ( 5 6) */ and %g2,%g3,%g2
+/* 0x09b4 ( 6 7) */ st %g2,[%o3+%g4]
+/* 0x09b8 281 ( 6 7) */ sll %g1,2,%g2
+/* 0x09bc ( 7 10) */ ld [%o3+%g2],%g2
+/* 0x09c0 ( 9 10) */ cmp %g2,0
+/* 0x09c4 ( 9 10) */ bleu,pn %icc,.L77000241 ! tprob=0.50
+/* 0x09c8 ( 9 10) */ or %g0,%o1,%o2
+/* 0x09cc (10 11) */ ba .L900000744 ! tprob=1.00
+/* 0x09d0 (10 11) */ cmp %o5,0
+ .L77000241: /* frequency 0.8 confidence 0.0 */
+/* 0x09d4 ( 0 1) */ subcc %g1,1,%o5
+/* 0x09d8 ( 0 1) */ bneg,pt %icc,.L900000744 ! tprob=0.60
+/* 0x09dc ( 1 2) */ cmp %o5,0
+/* 0x09e0 ( 1 2) */ sll %o5,2,%g2
+/* 0x09e4 ( 2 3) */ add %o1,%g2,%o0
+/* 0x09e8 ( 2 3) */ add %o3,%g2,%o4
+/* 0x09ec ( 3 6) */ ld [%o0],%g2
+ .L900000743: /* frequency 5.3 confidence 0.0 */
+/* 0x09f0 ( 0 3) */ ld [%o4],%g3
+/* 0x09f4 ( 0 1) */ add %o0,4,%o0
+/* 0x09f8 ( 0 1) */ add %o4,4,%o4
+/* 0x09fc ( 2 3) */ cmp %g3,%g2
+/* 0x0a00 ( 2 3) */ bne,pn %icc,.L77000244 ! tprob=0.16
+/* 0x0a04 ( 2 3) */ nop
+/* 0x0a08 ( 3 4) */ addcc %o5,1,%o5
+/* 0x0a0c ( 3 4) */ bpos,a,pt %icc,.L900000743 ! tprob=0.84
+/* 0x0a10 ( 3 6) */ ld [%o0],%g2
+ .L77000244: /* frequency 1.0 confidence 0.0 */
+/* 0x0a14 ( 0 1) */ cmp %o5,0
+ .L900000744: /* frequency 1.0 confidence 0.0 */
+/* 0x0a18 ( 0 1) */ bl,pn %icc,.L77000287 ! tprob=0.50
+/* 0x0a1c ( 0 1) */ sll %o5,2,%g2
+/* 0x0a20 ( 1 4) */ ld [%o2+%g2],%g3
+/* 0x0a24 ( 2 5) */ ld [%o3+%g2],%g2
+/* 0x0a28 ( 4 5) */ cmp %g2,%g3
+/* 0x0a2c ( 4 5) */ bleu,pt %icc,.L77000224 ! tprob=0.56
+/* 0x0a30 ( 4 5) */ nop
+ .L77000287: /* frequency 0.8 confidence 0.0 */
+/* 0x0a34 ( 0 1) */ cmp %g1,0
+/* 0x0a38 ( 0 1) */ ble,pt %icc,.L77000224 ! tprob=0.60
+/* 0x0a3c ( 0 1) */ nop
+/* 0x0a40 281 ( 1 2) */ sub %g1,1,%o7
+/* 0x0a44 ( 1 2) */ or %g0,-1,%g2
+/* 0x0a48 ( 2 3) */ srl %g2,0,%o4
+/* 0x0a4c ( 2 3) */ add %o7,1,%o0
+/* 0x0a50 279 ( 3 4) */ or %g0,0,%o5
+/* 0x0a54 ( 3 4) */ or %g0,0,%g1
+/* 0x0a58 ( 4 5) */ cmp %o0,3
+/* 0x0a5c ( 4 5) */ bl,pn %icc,.L77000288 ! tprob=0.40
+/* 0x0a60 ( 4 5) */ add %o3,8,%o1
+/* 0x0a64 ( 5 6) */ add %o2,4,%o0
+/* 0x0a68 ( 5 8) */ ld [%o1-8],%g2
+/* 0x0a6c 0 ( 5 6) */ or %g0,%o1,%o3
+/* 0x0a70 279 ( 6 9) */ ld [%o0-4],%g3
+/* 0x0a74 0 ( 6 7) */ or %g0,%o0,%o2
+/* 0x0a78 279 ( 6 7) */ or %g0,2,%g1
+/* 0x0a7c ( 7 10) */ ld [%o3-4],%o0
+/* 0x0a80 ( 8 9) */ sub %g2,%g3,%g2
+/* 0x0a84 ( 9 10) */ or %g0,%g2,%o5
+/* 0x0a88 ( 9 10) */ and %g2,%o4,%g2
+/* 0x0a8c ( 9 10) */ st %g2,[%o3-8]
+/* 0x0a90 (10 11) */ srax %o5,32,%o5
+ .L900000734: /* frequency 64.0 confidence 0.0 */
+/* 0x0a94 (12 20) */ ld [%o2],%g2
+/* 0x0a98 (12 13) */ add %g1,1,%g1
+/* 0x0a9c (12 13) */ add %o2,4,%o2
+/* 0x0aa0 (13 13) */ cmp %g1,%o7
+/* 0x0aa4 (13 14) */ add %o3,4,%o3
+/* 0x0aa8 (14 14) */ sub %o0,%g2,%o0
+/* 0x0aac (15 15) */ add %o0,%o5,%o5
+/* 0x0ab0 (16 17) */ and %o5,%o4,%g2
+/* 0x0ab4 (16 24) */ ld [%o3-4],%o0
+/* 0x0ab8 (17 18) */ st %g2,[%o3-8]
+/* 0x0abc (17 18) */ ble,pt %icc,.L900000734 ! tprob=0.50
+/* 0x0ac0 (17 18) */ srax %o5,32,%o5
+ .L900000737: /* frequency 8.0 confidence 0.0 */
+/* 0x0ac4 ( 0 3) */ ld [%o2],%o1
+/* 0x0ac8 ( 2 3) */ sub %o0,%o1,%o0
+/* 0x0acc ( 3 4) */ add %o0,%o5,%o0
+/* 0x0ad0 ( 4 5) */ and %o0,%o4,%o1
+/* 0x0ad4 ( 4 5) */ st %o1,[%o3-4]
+/* 0x0ad8 ( 5 7) */ ret ! Result =
+/* 0x0adc ( 7 8) */ restore %g0,%g0,%g0
+ .L77000288: /* frequency 0.6 confidence 0.0 */
+/* 0x0ae0 ( 0 3) */ ld [%o3],%o0
+ .L900000742: /* frequency 5.3 confidence 0.0 */
+/* 0x0ae4 ( 0 3) */ ld [%o2],%o1
+/* 0x0ae8 ( 0 1) */ add %o5,%o0,%o0
+/* 0x0aec ( 0 1) */ add %g1,1,%g1
+/* 0x0af0 ( 1 2) */ add %o2,4,%o2
+/* 0x0af4 ( 1 2) */ cmp %g1,%o7
+/* 0x0af8 ( 2 3) */ sub %o0,%o1,%o0
+/* 0x0afc ( 3 4) */ and %o0,%o4,%o1
+/* 0x0b00 ( 3 4) */ st %o1,[%o3]
+/* 0x0b04 ( 3 4) */ add %o3,4,%o3
+/* 0x0b08 ( 4 5) */ srax %o0,32,%o5
+/* 0x0b0c ( 4 5) */ ble,a,pt %icc,.L900000742 ! tprob=0.84
+/* 0x0b10 ( 4 7) */ ld [%o3],%o0
+ .L77000224: /* frequency 1.0 confidence 0.0 */
+/* 0x0b14 ( 0 2) */ ret ! Result =
+/* 0x0b18 ( 2 3) */ restore %g0,%g0,%g0
+/* 0x0b1c 0 ( 0 0) */ .type mont_mulf_noconv,2
+/* 0x0b1c ( 0 0) */ .size mont_mulf_noconv,(.-mont_mulf_noconv)
+
diff --git a/security/nss/lib/freebl/mpi/montmulfv8.il b/security/nss/lib/freebl/mpi/montmulfv8.il
new file mode 100644
index 0000000000..4952d0fb82
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulfv8.il
@@ -0,0 +1,108 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+!
+! double upper32(double /*frs1*/);
+!
+ .inline upper32,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+
+ fdtox %f10,%f10
+ fitod %f10,%f0
+ .end
+
+!
+! double lower32(double /*frs1*/, double /* Zero */);
+!
+ .inline lower32,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f12
+
+ fdtox %f10,%f10
+ fmovs %f12,%f10
+ fxtod %f10,%f0
+ .end
+
+!
+! double mod(double /*x*/, double /*1/m*/, double /*m*/);
+!
+ .inline mod,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f2
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o4,[%sp+0x48]
+ ldd [%sp+0x48],%f6
+
+ fmuld %f2,%f4,%f4
+ fdtox %f4,%f4
+ fxtod %f4,%f4
+ fmuld %f4,%f6,%f4
+ fsubd %f2,%f4,%f0
+ .end
+
+
+!
+! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
+! double * /* 0 */,
+! double * /*result16*/, double * /* result32 */
+! float * /*source - should be unsigned int*
+! converted to float* */);
+!
+ .inline i16_to_d16_and_d32x4,24
+ ldd [%o0],%f2 ! 1/(2^16)
+ ldd [%o1],%f4 ! 2^16
+ ldd [%o2],%f22
+
+ fmovd %f22,%f6
+ ld [%o5],%f7
+ fmovd %f22,%f10
+ ld [%o5+4],%f11
+ fmovd %f22,%f14
+ ld [%o5+8],%f15
+ fmovd %f22,%f18
+ ld [%o5+12],%f19
+ fxtod %f6,%f6
+ std %f6,[%o4]
+ fxtod %f10,%f10
+ std %f10,[%o4+8]
+ fxtod %f14,%f14
+ std %f14,[%o4+16]
+ fxtod %f18,%f18
+ std %f18,[%o4+24]
+ fmuld %f2,%f6,%f8
+ fmuld %f2,%f10,%f12
+ fmuld %f2,%f14,%f16
+ fmuld %f2,%f18,%f20
+ fdtox %f8,%f8
+ fdtox %f12,%f12
+ fdtox %f16,%f16
+ fdtox %f20,%f20
+ fxtod %f8,%f8
+ std %f8,[%o3+8]
+ fxtod %f12,%f12
+ std %f12,[%o3+24]
+ fxtod %f16,%f16
+ std %f16,[%o3+40]
+ fxtod %f20,%f20
+ std %f20,[%o3+56]
+ fmuld %f8,%f4,%f8
+ fmuld %f12,%f4,%f12
+ fmuld %f16,%f4,%f16
+ fmuld %f20,%f4,%f20
+ fsubd %f6,%f8,%f8
+ std %f8,[%o3]
+ fsubd %f10,%f12,%f12
+ std %f12,[%o3+16]
+ fsubd %f14,%f16,%f16
+ std %f16,[%o3+32]
+ fsubd %f18,%f20,%f20
+ std %f20,[%o3+48]
+ .end
+
+
diff --git a/security/nss/lib/freebl/mpi/montmulfv8.s b/security/nss/lib/freebl/mpi/montmulfv8.s
new file mode 100644
index 0000000000..ca738880fd
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulfv8.s
@@ -0,0 +1,1818 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .section ".text",#alloc,#execinstr
+ .file "montmulf.c"
+
+ .section ".rodata",#alloc
+ .global TwoTo16
+ .align 8
+!
+! CONSTANT POOL
+!
+ .global TwoTo16
+TwoTo16:
+ .word 1089470464
+ .word 0
+ .type TwoTo16,#object
+ .size TwoTo16,8
+ .global TwoToMinus16
+!
+! CONSTANT POOL
+!
+ .global TwoToMinus16
+TwoToMinus16:
+ .word 1055916032
+ .word 0
+ .type TwoToMinus16,#object
+ .size TwoToMinus16,8
+ .global Zero
+!
+! CONSTANT POOL
+!
+ .global Zero
+Zero:
+ .word 0
+ .word 0
+ .type Zero,#object
+ .size Zero,8
+ .global TwoTo32
+!
+! CONSTANT POOL
+!
+ .global TwoTo32
+TwoTo32:
+ .word 1106247680
+ .word 0
+ .type TwoTo32,#object
+ .size TwoTo32,8
+ .global TwoToMinus32
+!
+! CONSTANT POOL
+!
+ .global TwoToMinus32
+TwoToMinus32:
+ .word 1039138816
+ .word 0
+ .type TwoToMinus32,#object
+ .size TwoToMinus32,8
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 4
+!
+! SUBROUTINE conv_d16_to_i32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_d16_to_i32
+ conv_d16_to_i32:
+/* 000000 */ save %sp,-128,%sp
+! FILE montmulf.c
+
+! 36 !#define RF_INLINE_MACROS
+! 38 !static const double TwoTo16=65536.0;
+! 39 !static const double TwoToMinus16=1.0/65536.0;
+! 40 !static const double Zero=0.0;
+! 41 !static const double TwoTo32=65536.0*65536.0;
+! 42 !static const double TwoToMinus32=1.0/(65536.0*65536.0);
+! 44 !#ifdef RF_INLINE_MACROS
+! 46 !double upper32(double);
+! 47 !double lower32(double, double);
+! 48 !double mod(double, double, double);
+! 50 !void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/,
+! 51 ! const double * /* 2^16*/,
+! 52 ! const double * /* 0 */,
+! 53 ! double * /*result16*/,
+! 54 ! double * /* result32 */,
+! 55 ! float * /*source - should be unsigned int*
+! 56 ! converted to float* */);
+! 58 !#else
+! 60 !static double upper32(double x)
+! 61 !{
+! 62 ! return floor(x*TwoToMinus32);
+! 63 !}
+! 65 !static double lower32(double x, double y)
+! 66 !{
+! 67 ! return x-TwoTo32*floor(x*TwoToMinus32);
+! 68 !}
+! 70 !static double mod(double x, double oneoverm, double m)
+! 71 !{
+! 72 ! return x-m*floor(x*oneoverm);
+! 73 !}
+! 75 !#endif
+! 78 !static void cleanup(double *dt, int from, int tlen)
+! 79 !{
+! 80 ! int i;
+! 81 ! double tmp,tmp1,x,x1;
+! 83 ! tmp=tmp1=Zero;
+! 84 ! /* original code **
+! 85 ! for(i=2*from;i<2*tlen-2;i++)
+! 86 ! {
+! 87 ! x=dt[i];
+! 88 ! dt[i]=lower32(x,Zero)+tmp1;
+! 89 ! tmp1=tmp;
+! 90 ! tmp=upper32(x);
+! 91 ! }
+! 92 ! dt[tlen-2]+=tmp1;
+! 93 ! dt[tlen-1]+=tmp;
+! 94 ! **end original code ***/
+! 95 ! /* new code ***/
+! 96 ! for(i=2*from;i<2*tlen;i+=2)
+! 97 ! {
+! 98 ! x=dt[i];
+! 99 ! x1=dt[i+1];
+! 100 ! dt[i]=lower32(x,Zero)+tmp;
+! 101 ! dt[i+1]=lower32(x1,Zero)+tmp1;
+! 102 ! tmp=upper32(x);
+! 103 ! tmp1=upper32(x1);
+! 104 ! }
+! 105 ! /** end new code **/
+! 106 !}
+! 109 !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
+! 110 !{
+! 111 !int i;
+! 112 !long long t, t1, a, b, c, d;
+! 114 ! t1=0;
+! 115 ! a=(long long)d16[0];
+
+/* 0x0004 115 */ ldd [%i1],%f0
+/* 0x0008 110 */ or %g0,%i1,%o0
+
+! 116 ! b=(long long)d16[1];
+! 117 ! for(i=0; i<ilen-1; i++)
+
+/* 0x000c 117 */ sub %i3,1,%g2
+/* 0x0010 */ cmp %g2,0
+/* 0x0014 114 */ or %g0,0,%o4
+/* 0x0018 115 */ fdtox %f0,%f0
+/* 0x001c */ std %f0,[%sp+120]
+/* 0x0020 117 */ or %g0,0,%o7
+/* 0x0024 110 */ or %g0,%i3,%o1
+/* 0x0028 */ sub %i3,2,%o2
+/* 0x002c 116 */ ldd [%o0+8],%f0
+/* 0x0030 110 */ sethi %hi(0xfc00),%o1
+/* 0x0034 */ add %o2,1,%g3
+/* 0x0038 */ add %o1,1023,%o1
+/* 0x003c */ or %g0,%i0,%o5
+/* 0x0040 116 */ fdtox %f0,%f0
+/* 0x0044 */ std %f0,[%sp+112]
+/* 0x0048 */ ldx [%sp+112],%g1
+/* 0x004c 115 */ ldx [%sp+120],%g4
+/* 0x0050 117 */ ble,pt %icc,.L900000117
+/* 0x0054 */ sethi %hi(0xfc00),%g2
+/* 0x0058 110 */ or %g0,-1,%g2
+/* 0x005c 117 */ cmp %g3,3
+/* 0x0060 110 */ srl %g2,0,%o3
+/* 0x0064 117 */ bl,pn %icc,.L77000134
+/* 0x0068 */ or %g0,%o0,%g2
+
+! 118 ! {
+! 119 ! c=(long long)d16[2*i+2];
+
+/* 0x006c 119 */ ldd [%o0+16],%f0
+
+! 120 ! t1+=a&0xffffffff;
+! 121 ! t=(a>>32);
+! 122 ! d=(long long)d16[2*i+3];
+! 123 ! t1+=(b&0xffff)<<16;
+! 124 ! t+=(b>>16)+(t1>>32);
+! 125 ! i32[i]=t1&0xffffffff;
+! 126 ! t1=t;
+! 127 ! a=c;
+! 128 ! b=d;
+
+/* 0x0070 128 */ add %o0,16,%g2
+/* 0x0074 123 */ and %g1,%o1,%o0
+/* 0x0078 */ sllx %o0,16,%g3
+/* 0x007c 120 */ and %g4,%o3,%o0
+/* 0x0080 117 */ add %o0,%g3,%o4
+/* 0x0084 119 */ fdtox %f0,%f0
+/* 0x0088 */ std %f0,[%sp+104]
+/* 0x008c 125 */ and %o4,%o3,%g5
+/* 0x0090 122 */ ldd [%g2+8],%f2
+/* 0x0094 128 */ add %o5,4,%o5
+/* 0x0098 124 */ srax %o4,32,%o4
+/* 0x009c */ stx %o4,[%sp+112]
+/* 0x00a0 122 */ fdtox %f2,%f0
+/* 0x00a4 */ std %f0,[%sp+96]
+/* 0x00a8 124 */ srax %g1,16,%o0
+/* 0x00ac */ ldx [%sp+112],%o7
+/* 0x00b0 121 */ srax %g4,32,%o4
+/* 0x00b4 124 */ add %o0,%o7,%g4
+/* 0x00b8 128 */ or %g0,1,%o7
+/* 0x00bc 119 */ ldx [%sp+104],%g3
+/* 0x00c0 124 */ add %o4,%g4,%o4
+/* 0x00c4 122 */ ldx [%sp+96],%g1
+/* 0x00c8 125 */ st %g5,[%o5-4]
+/* 0x00cc 127 */ or %g0,%g3,%g4
+ .L900000112:
+/* 0x00d0 119 */ ldd [%g2+16],%f0
+/* 0x00d4 128 */ add %o7,1,%o7
+/* 0x00d8 */ add %o5,4,%o5
+/* 0x00dc */ cmp %o7,%o2
+/* 0x00e0 */ add %g2,16,%g2
+/* 0x00e4 119 */ fdtox %f0,%f0
+/* 0x00e8 */ std %f0,[%sp+104]
+/* 0x00ec 122 */ ldd [%g2+8],%f0
+/* 0x00f0 */ fdtox %f0,%f0
+/* 0x00f4 */ std %f0,[%sp+96]
+/* 0x00f8 123 */ and %g1,%o1,%g3
+/* 0x00fc */ sllx %g3,16,%g5
+/* 0x0100 120 */ and %g4,%o3,%g3
+/* 0x0104 117 */ add %g3,%g5,%g3
+/* 0x0108 124 */ srax %g1,16,%g1
+/* 0x010c 117 */ add %g3,%o4,%g3
+/* 0x0110 124 */ srax %g3,32,%o4
+/* 0x0114 */ stx %o4,[%sp+112]
+/* 0x0118 119 */ ldx [%sp+104],%g5
+/* 0x011c 121 */ srax %g4,32,%o4
+/* 0x0120 124 */ ldx [%sp+112],%g4
+/* 0x0124 */ add %g1,%g4,%g4
+/* 0x0128 122 */ ldx [%sp+96],%g1
+/* 0x012c 124 */ add %o4,%g4,%o4
+/* 0x0130 125 */ and %g3,%o3,%g3
+/* 0x0134 127 */ or %g0,%g5,%g4
+/* 0x0138 128 */ ble,pt %icc,.L900000112
+/* 0x013c */ st %g3,[%o5-4]
+ .L900000115:
+/* 0x0140 128 */ ba .L900000117
+/* 0x0144 */ sethi %hi(0xfc00),%g2
+ .L77000134:
+/* 0x0148 119 */ ldd [%g2+16],%f0
+ .L900000116:
+/* 0x014c 120 */ and %g4,%o3,%o0
+/* 0x0150 123 */ and %g1,%o1,%g3
+/* 0x0154 119 */ fdtox %f0,%f0
+/* 0x0158 120 */ add %o4,%o0,%o0
+/* 0x015c 119 */ std %f0,[%sp+104]
+/* 0x0160 128 */ add %o7,1,%o7
+/* 0x0164 123 */ sllx %g3,16,%o4
+/* 0x0168 122 */ ldd [%g2+24],%f2
+/* 0x016c 128 */ add %g2,16,%g2
+/* 0x0170 123 */ add %o0,%o4,%o0
+/* 0x0174 128 */ cmp %o7,%o2
+/* 0x0178 125 */ and %o0,%o3,%g3
+/* 0x017c 122 */ fdtox %f2,%f0
+/* 0x0180 */ std %f0,[%sp+96]
+/* 0x0184 124 */ srax %o0,32,%o0
+/* 0x0188 */ stx %o0,[%sp+112]
+/* 0x018c 121 */ srax %g4,32,%o4
+/* 0x0190 122 */ ldx [%sp+96],%o0
+/* 0x0194 124 */ srax %g1,16,%g5
+/* 0x0198 */ ldx [%sp+112],%g4
+/* 0x019c 119 */ ldx [%sp+104],%g1
+/* 0x01a0 125 */ st %g3,[%o5]
+/* 0x01a4 124 */ add %g5,%g4,%g4
+/* 0x01a8 128 */ add %o5,4,%o5
+/* 0x01ac 124 */ add %o4,%g4,%o4
+/* 0x01b0 127 */ or %g0,%g1,%g4
+/* 0x01b4 128 */ or %g0,%o0,%g1
+/* 0x01b8 */ ble,a,pt %icc,.L900000116
+/* 0x01bc */ ldd [%g2+16],%f0
+ .L77000127:
+
+! 129 ! }
+! 130 ! t1+=a&0xffffffff;
+! 131 ! t=(a>>32);
+! 132 ! t1+=(b&0xffff)<<16;
+! 133 ! i32[i]=t1&0xffffffff;
+
+/* 0x01c0 133 */ sethi %hi(0xfc00),%g2
+ .L900000117:
+/* 0x01c4 133 */ or %g0,-1,%g3
+/* 0x01c8 */ add %g2,1023,%g2
+/* 0x01cc */ srl %g3,0,%g3
+/* 0x01d0 */ and %g1,%g2,%g2
+/* 0x01d4 */ and %g4,%g3,%g4
+/* 0x01d8 */ sllx %g2,16,%g2
+/* 0x01dc */ add %o4,%g4,%g4
+/* 0x01e0 */ add %g4,%g2,%g2
+/* 0x01e4 */ sll %o7,2,%g4
+/* 0x01e8 */ and %g2,%g3,%g2
+/* 0x01ec */ st %g2,[%i0+%g4]
+/* 0x01f0 */ ret ! Result =
+/* 0x01f4 */ restore %g0,%g0,%g0
+/* 0x01f8 0 */ .type conv_d16_to_i32,2
+/* 0x01f8 */ .size conv_d16_to_i32,(.-conv_d16_to_i32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000201:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 4
+/* 0x0008 */ .skip 16
+!
+! SUBROUTINE conv_i32_to_d32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d32
+ conv_i32_to_d32:
+/* 000000 */ or %g0,%o7,%g2
+
+! 135 !}
+! 137 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
+! 138 !{
+! 139 !int i;
+! 141 !#pragma pipeloop(0)
+! 142 ! for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
+
+/* 0x0004 142 */ cmp %o2,0
+ .L900000210:
+/* 0x0008 */ call .+8
+/* 0x000c */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
+/* 0x0010 142 */ or %g0,0,%o5
+/* 0x0014 138 */ add %g4,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
+/* 0x0018 */ or %g0,%o0,%g5
+/* 0x001c */ add %g4,%o7,%g1
+/* 0x0020 142 */ ble,pt %icc,.L77000140
+/* 0x0024 */ or %g0,%g2,%o7
+/* 0x0028 */ sethi %hi(.L_const_seg_900000201),%g2
+/* 0x002c 138 */ or %g0,%o1,%g4
+/* 0x0030 142 */ add %g2,%lo(.L_const_seg_900000201),%g2
+/* 0x0034 */ sub %o2,1,%g3
+/* 0x0038 */ ld [%g1+%g2],%g2
+/* 0x003c */ cmp %o2,9
+/* 0x0040 */ bl,pn %icc,.L77000144
+/* 0x0044 */ ldd [%g2],%f8
+/* 0x0048 */ add %o1,16,%g4
+/* 0x004c */ sub %o2,5,%g1
+/* 0x0050 */ ld [%o1],%f7
+/* 0x0054 */ or %g0,4,%o5
+/* 0x0058 */ ld [%o1+4],%f5
+/* 0x005c */ ld [%o1+8],%f3
+/* 0x0060 */ fmovs %f8,%f6
+/* 0x0064 */ ld [%o1+12],%f1
+ .L900000205:
+/* 0x0068 */ ld [%g4],%f11
+/* 0x006c */ add %o5,5,%o5
+/* 0x0070 */ add %g4,20,%g4
+/* 0x0074 */ fsubd %f6,%f8,%f6
+/* 0x0078 */ std %f6,[%g5]
+/* 0x007c */ cmp %o5,%g1
+/* 0x0080 */ add %g5,40,%g5
+/* 0x0084 */ fmovs %f8,%f4
+/* 0x0088 */ ld [%g4-16],%f7
+/* 0x008c */ fsubd %f4,%f8,%f12
+/* 0x0090 */ fmovs %f8,%f2
+/* 0x0094 */ std %f12,[%g5-32]
+/* 0x0098 */ ld [%g4-12],%f5
+/* 0x009c */ fsubd %f2,%f8,%f12
+/* 0x00a0 */ fmovs %f8,%f0
+/* 0x00a4 */ std %f12,[%g5-24]
+/* 0x00a8 */ ld [%g4-8],%f3
+/* 0x00ac */ fsubd %f0,%f8,%f12
+/* 0x00b0 */ fmovs %f8,%f10
+/* 0x00b4 */ std %f12,[%g5-16]
+/* 0x00b8 */ ld [%g4-4],%f1
+/* 0x00bc */ fsubd %f10,%f8,%f10
+/* 0x00c0 */ fmovs %f8,%f6
+/* 0x00c4 */ ble,pt %icc,.L900000205
+/* 0x00c8 */ std %f10,[%g5-8]
+ .L900000208:
+/* 0x00cc */ fmovs %f8,%f4
+/* 0x00d0 */ add %g5,32,%g5
+/* 0x00d4 */ cmp %o5,%g3
+/* 0x00d8 */ fmovs %f8,%f2
+/* 0x00dc */ fmovs %f8,%f0
+/* 0x00e0 */ fsubd %f6,%f8,%f6
+/* 0x00e4 */ std %f6,[%g5-32]
+/* 0x00e8 */ fsubd %f4,%f8,%f4
+/* 0x00ec */ std %f4,[%g5-24]
+/* 0x00f0 */ fsubd %f2,%f8,%f2
+/* 0x00f4 */ std %f2,[%g5-16]
+/* 0x00f8 */ fsubd %f0,%f8,%f0
+/* 0x00fc */ bg,pn %icc,.L77000140
+/* 0x0100 */ std %f0,[%g5-8]
+ .L77000144:
+/* 0x0104 */ ld [%g4],%f1
+ .L900000211:
+/* 0x0108 */ ldd [%g2],%f8
+/* 0x010c */ add %o5,1,%o5
+/* 0x0110 */ add %g4,4,%g4
+/* 0x0114 */ cmp %o5,%g3
+/* 0x0118 */ fmovs %f8,%f0
+/* 0x011c */ fsubd %f0,%f8,%f0
+/* 0x0120 */ std %f0,[%g5]
+/* 0x0124 */ add %g5,8,%g5
+/* 0x0128 */ ble,a,pt %icc,.L900000211
+/* 0x012c */ ld [%g4],%f1
+ .L77000140:
+/* 0x0130 */ retl ! Result =
+/* 0x0134 */ nop
+/* 0x0138 0 */ .type conv_i32_to_d32,2
+/* 0x0138 */ .size conv_i32_to_d32,(.-conv_i32_to_d32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000301:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 4
+!
+! SUBROUTINE conv_i32_to_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d16
+ conv_i32_to_d16:
+/* 000000 */ save %sp,-104,%sp
+/* 0x0004 */ or %g0,%i2,%o0
+
+! 143 !}
+! 146 !void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
+! 147 !{
+! 148 !int i;
+! 149 !unsigned int a;
+! 151 !#pragma pipeloop(0)
+! 152 ! for(i=0;i<len;i++)
+! 153 ! {
+! 154 ! a=i32[i];
+! 155 ! d16[2*i]=(double)(a&0xffff);
+! 156 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x0008 156 */ sethi %hi(.L_const_seg_900000301),%g2
+ .L900000310:
+/* 0x000c */ call .+8
+/* 0x0010 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
+/* 0x0014 152 */ cmp %o0,0
+/* 0x0018 147 */ add %g3,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
+/* 0x001c 152 */ ble,pt %icc,.L77000150
+/* 0x0020 */ add %g3,%o7,%o2
+/* 0x0024 */ sub %i2,1,%o5
+/* 0x0028 156 */ add %g2,%lo(.L_const_seg_900000301),%o1
+/* 0x002c 152 */ sethi %hi(0xfc00),%o0
+/* 0x0030 */ ld [%o2+%o1],%o3
+/* 0x0034 */ add %o5,1,%g2
+/* 0x0038 */ or %g0,0,%g1
+/* 0x003c */ cmp %g2,3
+/* 0x0040 */ or %g0,%i1,%o7
+/* 0x0044 */ add %o0,1023,%o4
+/* 0x0048 */ or %g0,%i0,%g3
+/* 0x004c */ bl,pn %icc,.L77000154
+/* 0x0050 */ add %o7,4,%o0
+/* 0x0054 155 */ ldd [%o3],%f0
+/* 0x0058 156 */ or %g0,1,%g1
+/* 0x005c 154 */ ld [%o0-4],%o1
+/* 0x0060 0 */ or %g0,%o0,%o7
+/* 0x0064 155 */ and %o1,%o4,%o0
+ .L900000306:
+/* 0x0068 155 */ st %o0,[%sp+96]
+/* 0x006c 156 */ add %g1,1,%g1
+/* 0x0070 */ add %g3,16,%g3
+/* 0x0074 */ cmp %g1,%o5
+/* 0x0078 */ add %o7,4,%o7
+/* 0x007c 155 */ ld [%sp+96],%f3
+/* 0x0080 */ fmovs %f0,%f2
+/* 0x0084 */ fsubd %f2,%f0,%f2
+/* 0x0088 156 */ srl %o1,16,%o0
+/* 0x008c 155 */ std %f2,[%g3-16]
+/* 0x0090 156 */ st %o0,[%sp+92]
+/* 0x0094 */ ld [%sp+92],%f3
+/* 0x0098 154 */ ld [%o7-4],%o1
+/* 0x009c 156 */ fmovs %f0,%f2
+/* 0x00a0 */ fsubd %f2,%f0,%f2
+/* 0x00a4 155 */ and %o1,%o4,%o0
+/* 0x00a8 156 */ ble,pt %icc,.L900000306
+/* 0x00ac */ std %f2,[%g3-8]
+ .L900000309:
+/* 0x00b0 155 */ st %o0,[%sp+96]
+/* 0x00b4 */ fmovs %f0,%f2
+/* 0x00b8 156 */ add %g3,16,%g3
+/* 0x00bc */ srl %o1,16,%o0
+/* 0x00c0 155 */ ld [%sp+96],%f3
+/* 0x00c4 */ fsubd %f2,%f0,%f2
+/* 0x00c8 */ std %f2,[%g3-16]
+/* 0x00cc 156 */ st %o0,[%sp+92]
+/* 0x00d0 */ fmovs %f0,%f2
+/* 0x00d4 */ ld [%sp+92],%f3
+/* 0x00d8 */ fsubd %f2,%f0,%f0
+/* 0x00dc */ std %f0,[%g3-8]
+/* 0x00e0 */ ret ! Result =
+/* 0x00e4 */ restore %g0,%g0,%g0
+ .L77000154:
+/* 0x00e8 154 */ ld [%o7],%o0
+ .L900000311:
+/* 0x00ec 155 */ and %o0,%o4,%o1
+/* 0x00f0 */ st %o1,[%sp+96]
+/* 0x00f4 156 */ add %g1,1,%g1
+/* 0x00f8 155 */ ldd [%o3],%f0
+/* 0x00fc 156 */ srl %o0,16,%o0
+/* 0x0100 */ add %o7,4,%o7
+/* 0x0104 */ cmp %g1,%o5
+/* 0x0108 155 */ fmovs %f0,%f2
+/* 0x010c */ ld [%sp+96],%f3
+/* 0x0110 */ fsubd %f2,%f0,%f2
+/* 0x0114 */ std %f2,[%g3]
+/* 0x0118 156 */ st %o0,[%sp+92]
+/* 0x011c */ fmovs %f0,%f2
+/* 0x0120 */ ld [%sp+92],%f3
+/* 0x0124 */ fsubd %f2,%f0,%f0
+/* 0x0128 */ std %f0,[%g3+8]
+/* 0x012c */ add %g3,16,%g3
+/* 0x0130 */ ble,a,pt %icc,.L900000311
+/* 0x0134 */ ld [%o7],%o0
+ .L77000150:
+/* 0x0138 */ ret ! Result =
+/* 0x013c */ restore %g0,%g0,%g0
+/* 0x0140 0 */ .type conv_i32_to_d16,2
+/* 0x0140 */ .size conv_i32_to_d16,(.-conv_i32_to_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000401:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 4
+/* 0x0008 */ .skip 16
+!
+! SUBROUTINE conv_i32_to_d32_and_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d32_and_d16
+ conv_i32_to_d32_and_d16:
+/* 000000 */ save %sp,-120,%sp
+ .L900000415:
+/* 0x0004 */ call .+8
+/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g4
+
+! 157 ! }
+! 158 !}
+! 161 !void conv_i32_to_d32_and_d16(double *d32, double *d16,
+! 162 ! unsigned int *i32, int len)
+! 163 !{
+! 164 !int i = 0;
+! 165 !unsigned int a;
+! 167 !#pragma pipeloop(0)
+! 168 !#ifdef RF_INLINE_MACROS
+! 169 ! for(;i<len-3;i+=4)
+
+/* 0x000c 169 */ sub %i3,3,%g2
+/* 0x0010 */ cmp %g2,0
+/* 0x0014 163 */ add %g4,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g4
+
+! 170 ! {
+! 171 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
+! 172 ! &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
+
+/* 0x0018 172 */ sethi %hi(Zero),%g2
+/* 0x001c 163 */ add %g4,%o7,%o4
+/* 0x0020 172 */ add %g2,%lo(Zero),%g2
+/* 0x0024 */ sethi %hi(TwoToMinus16),%g3
+/* 0x0028 */ ld [%o4+%g2],%o1
+/* 0x002c */ sethi %hi(TwoTo16),%g4
+/* 0x0030 */ add %g3,%lo(TwoToMinus16),%g2
+/* 0x0034 */ ld [%o4+%g2],%o3
+/* 0x0038 164 */ or %g0,0,%g5
+/* 0x003c 172 */ add %g4,%lo(TwoTo16),%g3
+/* 0x0040 */ ld [%o4+%g3],%o2
+/* 0x0044 163 */ or %g0,%i0,%i4
+/* 0x0048 169 */ or %g0,%i2,%o7
+/* 0x004c */ ble,pt %icc,.L900000418
+/* 0x0050 */ cmp %g5,%i3
+/* 0x0054 172 */ stx %o7,[%sp+104]
+/* 0x0058 169 */ sub %i3,4,%o5
+/* 0x005c */ or %g0,0,%g4
+/* 0x0060 */ or %g0,0,%g1
+ .L900000417:
+/* 0x0064 */ ldd [%o1],%f2
+/* 0x0068 172 */ add %i4,%g4,%g2
+/* 0x006c */ add %i1,%g1,%g3
+/* 0x0070 */ ldd [%o3],%f0
+/* 0x0074 */ add %g5,4,%g5
+/* 0x0078 */ fmovd %f2,%f14
+/* 0x007c */ ld [%o7],%f15
+/* 0x0080 */ cmp %g5,%o5
+/* 0x0084 */ fmovd %f2,%f10
+/* 0x0088 */ ld [%o7+4],%f11
+/* 0x008c */ add %o7,16,%o7
+/* 0x0090 */ ldx [%sp+104],%o0
+/* 0x0094 */ fmovd %f2,%f6
+/* 0x0098 */ stx %o7,[%sp+112]
+/* 0x009c */ fxtod %f14,%f14
+/* 0x00a0 */ ld [%o0+8],%f7
+/* 0x00a4 */ fxtod %f10,%f10
+/* 0x00a8 */ ld [%o0+12],%f3
+/* 0x00ac */ fxtod %f6,%f6
+/* 0x00b0 */ ldd [%o2],%f16
+/* 0x00b4 */ fmuld %f0,%f14,%f12
+/* 0x00b8 */ fxtod %f2,%f2
+/* 0x00bc */ fmuld %f0,%f10,%f8
+/* 0x00c0 */ std %f14,[%i4+%g4]
+/* 0x00c4 */ ldx [%sp+112],%o7
+/* 0x00c8 */ add %g4,32,%g4
+/* 0x00cc */ fmuld %f0,%f6,%f4
+/* 0x00d0 */ fdtox %f12,%f12
+/* 0x00d4 */ std %f10,[%g2+8]
+/* 0x00d8 */ fmuld %f0,%f2,%f0
+/* 0x00dc */ fdtox %f8,%f8
+/* 0x00e0 */ std %f6,[%g2+16]
+/* 0x00e4 */ std %f2,[%g2+24]
+/* 0x00e8 */ fdtox %f4,%f4
+/* 0x00ec */ fdtox %f0,%f0
+/* 0x00f0 */ fxtod %f12,%f12
+/* 0x00f4 */ std %f12,[%g3+8]
+/* 0x00f8 */ fxtod %f8,%f8
+/* 0x00fc */ std %f8,[%g3+24]
+/* 0x0100 */ fxtod %f4,%f4
+/* 0x0104 */ std %f4,[%g3+40]
+/* 0x0108 */ fxtod %f0,%f0
+/* 0x010c */ std %f0,[%g3+56]
+/* 0x0110 */ fmuld %f12,%f16,%f12
+/* 0x0114 */ fmuld %f8,%f16,%f8
+/* 0x0118 */ fmuld %f4,%f16,%f4
+/* 0x011c */ fsubd %f14,%f12,%f12
+/* 0x0120 */ std %f12,[%i1+%g1]
+/* 0x0124 */ fmuld %f0,%f16,%f0
+/* 0x0128 */ fsubd %f10,%f8,%f8
+/* 0x012c */ std %f8,[%g3+16]
+/* 0x0130 */ add %g1,64,%g1
+/* 0x0134 */ fsubd %f6,%f4,%f4
+/* 0x0138 */ std %f4,[%g3+32]
+/* 0x013c */ fsubd %f2,%f0,%f0
+/* 0x0140 */ std %f0,[%g3+48]
+/* 0x0144 */ ble,a,pt %icc,.L900000417
+/* 0x0148 */ stx %o7,[%sp+104]
+ .L77000159:
+
+! 173 ! }
+! 174 !#endif
+! 175 ! for(;i<len;i++)
+
+/* 0x014c 175 */ cmp %g5,%i3
+ .L900000418:
+/* 0x0150 175 */ bge,pt %icc,.L77000164
+/* 0x0154 */ nop
+
+! 176 ! {
+! 177 ! a=i32[i];
+! 178 ! d32[i]=(double)(i32[i]);
+! 179 ! d16[2*i]=(double)(a&0xffff);
+! 180 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x0158 180 */ sethi %hi(.L_const_seg_900000401),%g2
+/* 0x015c */ add %g2,%lo(.L_const_seg_900000401),%o1
+/* 0x0160 175 */ sethi %hi(0xfc00),%o0
+/* 0x0164 */ ld [%o4+%o1],%o2
+/* 0x0168 */ sll %g5,2,%o3
+/* 0x016c */ sub %i3,%g5,%g3
+/* 0x0170 */ sll %g5,3,%g2
+/* 0x0174 */ add %o0,1023,%o4
+/* 0x0178 178 */ ldd [%o2],%f0
+/* 0x017c */ add %i2,%o3,%o0
+/* 0x0180 175 */ cmp %g3,3
+/* 0x0184 */ add %i4,%g2,%o3
+/* 0x0188 */ sub %i3,1,%o1
+/* 0x018c */ sll %g5,4,%g4
+/* 0x0190 */ bl,pn %icc,.L77000161
+/* 0x0194 */ add %i1,%g4,%o5
+/* 0x0198 178 */ ld [%o0],%f3
+/* 0x019c 180 */ add %o3,8,%o3
+/* 0x01a0 177 */ ld [%o0],%o7
+/* 0x01a4 180 */ add %o5,16,%o5
+/* 0x01a8 */ add %g5,1,%g5
+/* 0x01ac 178 */ fmovs %f0,%f2
+/* 0x01b0 180 */ add %o0,4,%o0
+/* 0x01b4 179 */ and %o7,%o4,%g1
+/* 0x01b8 178 */ fsubd %f2,%f0,%f2
+/* 0x01bc */ std %f2,[%o3-8]
+/* 0x01c0 180 */ srl %o7,16,%o7
+/* 0x01c4 179 */ st %g1,[%sp+96]
+/* 0x01c8 */ fmovs %f0,%f2
+/* 0x01cc */ ld [%sp+96],%f3
+/* 0x01d0 */ fsubd %f2,%f0,%f2
+/* 0x01d4 */ std %f2,[%o5-16]
+/* 0x01d8 180 */ st %o7,[%sp+92]
+/* 0x01dc */ fmovs %f0,%f2
+/* 0x01e0 */ ld [%sp+92],%f3
+/* 0x01e4 */ fsubd %f2,%f0,%f2
+/* 0x01e8 */ std %f2,[%o5-8]
+ .L900000411:
+/* 0x01ec 178 */ ld [%o0],%f3
+/* 0x01f0 180 */ add %g5,2,%g5
+/* 0x01f4 */ add %o5,32,%o5
+/* 0x01f8 177 */ ld [%o0],%o7
+/* 0x01fc 180 */ cmp %g5,%o1
+/* 0x0200 */ add %o3,16,%o3
+/* 0x0204 178 */ fmovs %f0,%f2
+/* 0x0208 */ fsubd %f2,%f0,%f2
+/* 0x020c */ std %f2,[%o3-16]
+/* 0x0210 179 */ and %o7,%o4,%g1
+/* 0x0214 */ st %g1,[%sp+96]
+/* 0x0218 */ ld [%sp+96],%f3
+/* 0x021c */ fmovs %f0,%f2
+/* 0x0220 */ fsubd %f2,%f0,%f2
+/* 0x0224 180 */ srl %o7,16,%o7
+/* 0x0228 179 */ std %f2,[%o5-32]
+/* 0x022c 180 */ st %o7,[%sp+92]
+/* 0x0230 */ ld [%sp+92],%f3
+/* 0x0234 */ fmovs %f0,%f2
+/* 0x0238 */ fsubd %f2,%f0,%f2
+/* 0x023c */ std %f2,[%o5-24]
+/* 0x0240 */ add %o0,4,%o0
+/* 0x0244 178 */ ld [%o0],%f3
+/* 0x0248 177 */ ld [%o0],%o7
+/* 0x024c 178 */ fmovs %f0,%f2
+/* 0x0250 */ fsubd %f2,%f0,%f2
+/* 0x0254 */ std %f2,[%o3-8]
+/* 0x0258 179 */ and %o7,%o4,%g1
+/* 0x025c */ st %g1,[%sp+96]
+/* 0x0260 */ ld [%sp+96],%f3
+/* 0x0264 */ fmovs %f0,%f2
+/* 0x0268 */ fsubd %f2,%f0,%f2
+/* 0x026c 180 */ srl %o7,16,%o7
+/* 0x0270 179 */ std %f2,[%o5-16]
+/* 0x0274 180 */ st %o7,[%sp+92]
+/* 0x0278 */ ld [%sp+92],%f3
+/* 0x027c */ fmovs %f0,%f2
+/* 0x0280 */ fsubd %f2,%f0,%f2
+/* 0x0284 */ std %f2,[%o5-8]
+/* 0x0288 */ bl,pt %icc,.L900000411
+/* 0x028c */ add %o0,4,%o0
+ .L900000414:
+/* 0x0290 180 */ cmp %g5,%i3
+/* 0x0294 */ bge,pn %icc,.L77000164
+/* 0x0298 */ nop
+ .L77000161:
+/* 0x029c 178 */ ld [%o0],%f3
+ .L900000416:
+/* 0x02a0 178 */ ldd [%o2],%f0
+/* 0x02a4 180 */ add %g5,1,%g5
+/* 0x02a8 177 */ ld [%o0],%o1
+/* 0x02ac 180 */ add %o0,4,%o0
+/* 0x02b0 */ cmp %g5,%i3
+/* 0x02b4 178 */ fmovs %f0,%f2
+/* 0x02b8 179 */ and %o1,%o4,%o7
+/* 0x02bc 178 */ fsubd %f2,%f0,%f2
+/* 0x02c0 */ std %f2,[%o3]
+/* 0x02c4 180 */ srl %o1,16,%o1
+/* 0x02c8 179 */ st %o7,[%sp+96]
+/* 0x02cc 180 */ add %o3,8,%o3
+/* 0x02d0 179 */ fmovs %f0,%f2
+/* 0x02d4 */ ld [%sp+96],%f3
+/* 0x02d8 */ fsubd %f2,%f0,%f2
+/* 0x02dc */ std %f2,[%o5]
+/* 0x02e0 180 */ st %o1,[%sp+92]
+/* 0x02e4 */ fmovs %f0,%f2
+/* 0x02e8 */ ld [%sp+92],%f3
+/* 0x02ec */ fsubd %f2,%f0,%f0
+/* 0x02f0 */ std %f0,[%o5+8]
+/* 0x02f4 */ add %o5,16,%o5
+/* 0x02f8 */ bl,a,pt %icc,.L900000416
+/* 0x02fc */ ld [%o0],%f3
+ .L77000164:
+/* 0x0300 */ ret ! Result =
+/* 0x0304 */ restore %g0,%g0,%g0
+/* 0x0308 0 */ .type conv_i32_to_d32_and_d16,2
+/* 0x0308 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 4
+!
+! SUBROUTINE adjust_montf_result
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global adjust_montf_result
+ adjust_montf_result:
+/* 000000 */ or %g0,%o2,%g5
+
+! 181 ! }
+! 182 !}
+! 185 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
+! 186 !{
+! 187 !long long acc;
+! 188 !int i;
+! 190 ! if(i32[len]>0) i=-1;
+
+/* 0x0004 190 */ or %g0,-1,%g4
+/* 0x0008 */ sll %o2,2,%g1
+/* 0x000c */ ld [%o0+%g1],%g1
+/* 0x0010 */ cmp %g1,0
+/* 0x0014 */ bleu,pn %icc,.L77000175
+/* 0x0018 */ or %g0,%o1,%o3
+/* 0x001c */ ba .L900000511
+/* 0x0020 */ cmp %g4,0
+ .L77000175:
+
+! 191 ! else
+! 192 ! {
+! 193 ! for(i=len-1; i>=0; i--)
+
+/* 0x0024 193 */ sub %o2,1,%g4
+/* 0x0028 */ sll %g4,2,%g1
+/* 0x002c */ cmp %g4,0
+/* 0x0030 */ bl,pt %icc,.L900000511
+/* 0x0034 */ cmp %g4,0
+/* 0x0038 */ add %o1,%g1,%g2
+
+! 194 ! {
+! 195 ! if(i32[i]!=nint[i]) break;
+
+/* 0x003c 195 */ ld [%g2],%o5
+/* 0x0040 193 */ add %o0,%g1,%g3
+ .L900000510:
+/* 0x0044 195 */ ld [%g3],%o2
+/* 0x0048 */ sub %g4,1,%g1
+/* 0x004c */ sub %g2,4,%g2
+/* 0x0050 */ sub %g3,4,%g3
+/* 0x0054 */ cmp %o2,%o5
+/* 0x0058 */ bne,pn %icc,.L77000182
+/* 0x005c */ nop
+/* 0x0060 0 */ or %g0,%g1,%g4
+/* 0x0064 195 */ cmp %g1,0
+/* 0x0068 */ bge,a,pt %icc,.L900000510
+/* 0x006c */ ld [%g2],%o5
+ .L77000182:
+
+! 196 ! }
+! 197 ! }
+! 198 ! if((i<0)||(i32[i]>nint[i]))
+
+/* 0x0070 198 */ cmp %g4,0
+ .L900000511:
+/* 0x0074 198 */ bl,pn %icc,.L77000198
+/* 0x0078 */ sll %g4,2,%g2
+/* 0x007c */ ld [%o1+%g2],%g1
+/* 0x0080 */ ld [%o0+%g2],%g2
+/* 0x0084 */ cmp %g2,%g1
+/* 0x0088 */ bleu,pt %icc,.L77000191
+/* 0x008c */ nop
+ .L77000198:
+
+! 199 ! {
+! 200 ! acc=0;
+! 201 ! for(i=0;i<len;i++)
+
+/* 0x0090 201 */ cmp %g5,0
+/* 0x0094 */ ble,pt %icc,.L77000191
+/* 0x0098 */ nop
+/* 0x009c */ or %g0,%g5,%g1
+/* 0x00a0 198 */ or %g0,-1,%g2
+/* 0x00a4 */ srl %g2,0,%g3
+/* 0x00a8 */ sub %g5,1,%g4
+/* 0x00ac 200 */ or %g0,0,%g5
+/* 0x00b0 201 */ or %g0,0,%o5
+/* 0x00b4 198 */ or %g0,%o0,%o4
+/* 0x00b8 */ cmp %g1,3
+/* 0x00bc 201 */ bl,pn %icc,.L77000199
+/* 0x00c0 */ add %o0,8,%g1
+/* 0x00c4 */ add %o1,4,%g2
+
+! 202 ! {
+! 203 ! acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
+
+/* 0x00c8 203 */ ld [%o0],%o2
+/* 0x00cc */ ld [%o1],%o1
+/* 0x00d0 0 */ or %g0,%g1,%o4
+/* 0x00d4 */ or %g0,%g2,%o3
+/* 0x00d8 203 */ ld [%o0+4],%g1
+
+! 204 ! i32[i]=acc&0xffffffff;
+! 205 ! acc=acc>>32;
+
+/* 0x00dc 205 */ or %g0,2,%o5
+/* 0x00e0 201 */ sub %o2,%o1,%o2
+/* 0x00e4 */ or %g0,%o2,%g5
+/* 0x00e8 204 */ and %o2,%g3,%o2
+/* 0x00ec */ st %o2,[%o0]
+/* 0x00f0 205 */ srax %g5,32,%g5
+ .L900000505:
+/* 0x00f4 203 */ ld [%o3],%o2
+/* 0x00f8 205 */ add %o5,1,%o5
+/* 0x00fc */ add %o3,4,%o3
+/* 0x0100 */ cmp %o5,%g4
+/* 0x0104 */ add %o4,4,%o4
+/* 0x0108 201 */ sub %g1,%o2,%g1
+/* 0x010c */ add %g1,%g5,%g5
+/* 0x0110 204 */ and %g5,%g3,%o2
+/* 0x0114 203 */ ld [%o4-4],%g1
+/* 0x0118 204 */ st %o2,[%o4-8]
+/* 0x011c 205 */ ble,pt %icc,.L900000505
+/* 0x0120 */ srax %g5,32,%g5
+ .L900000508:
+/* 0x0124 203 */ ld [%o3],%g2
+/* 0x0128 201 */ sub %g1,%g2,%g1
+/* 0x012c */ add %g1,%g5,%g1
+/* 0x0130 204 */ and %g1,%g3,%g2
+/* 0x0134 */ retl ! Result =
+/* 0x0138 */ st %g2,[%o4-4]
+ .L77000199:
+/* 0x013c 203 */ ld [%o4],%g1
+ .L900000509:
+/* 0x0140 203 */ ld [%o3],%g2
+/* 0x0144 */ add %g5,%g1,%g1
+/* 0x0148 205 */ add %o5,1,%o5
+/* 0x014c */ add %o3,4,%o3
+/* 0x0150 */ cmp %o5,%g4
+/* 0x0154 203 */ sub %g1,%g2,%g1
+/* 0x0158 204 */ and %g1,%g3,%g2
+/* 0x015c */ st %g2,[%o4]
+/* 0x0160 205 */ add %o4,4,%o4
+/* 0x0164 */ srax %g1,32,%g5
+/* 0x0168 */ ble,a,pt %icc,.L900000509
+/* 0x016c */ ld [%o4],%g1
+ .L77000191:
+/* 0x0170 */ retl ! Result =
+/* 0x0174 */ nop
+/* 0x0178 0 */ .type adjust_montf_result,2
+/* 0x0178 */ .size adjust_montf_result,(.-adjust_montf_result)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 4
+/* 000000 */ .skip 16
+!
+! SUBROUTINE mont_mulf_noconv
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global mont_mulf_noconv
+ mont_mulf_noconv:
+/* 000000 */ save %sp,-144,%sp
+ .L900000646:
+/* 0x0004 */ call .+8
+/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000646-.)),%g5
+
+! 206 ! }
+! 207 ! }
+! 208 !}
+! 213 !/*
+! 214 !** the lengths of the input arrays should be at least the following:
+! 215 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
+! 216 !** all of them should be different from one another
+! 217 !**
+! 218 !*/
+! 219 !void mont_mulf_noconv(unsigned int *result,
+! 220 ! double *dm1, double *dm2, double *dt,
+! 221 ! double *dn, unsigned int *nint,
+! 222 ! int nlen, double dn0)
+! 223 !{
+! 224 ! int i, j, jj;
+! 225 ! int tmp;
+! 226 ! double digit, m2j, nextm2j, a, b;
+! 227 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
+! 229 ! pdm1=&(dm1[0]);
+! 230 ! pdm2=&(dm2[0]);
+! 231 ! pdn=&(dn[0]);
+! 232 ! pdm2[2*nlen]=Zero;
+
+/* 0x000c 232 */ ld [%fp+92],%o1
+/* 0x0010 */ sethi %hi(Zero),%g2
+/* 0x0014 223 */ ldd [%fp+96],%f2
+/* 0x0018 */ add %g5,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000646-.)),%g5
+/* 0x001c 232 */ add %g2,%lo(Zero),%g2
+/* 0x0020 223 */ st %i0,[%fp+68]
+/* 0x0024 */ add %g5,%o7,%o3
+
+! 234 ! if (nlen!=16)
+! 235 ! {
+! 236 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
+! 238 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 239 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+
+/* 0x0028 239 */ sethi %hi(TwoToMinus16),%g3
+/* 0x002c 232 */ ld [%o3+%g2],%l0
+/* 0x0030 239 */ sethi %hi(TwoTo16),%g4
+/* 0x0034 223 */ or %g0,%i2,%o2
+/* 0x0038 */ fmovd %f2,%f16
+/* 0x003c */ st %i5,[%fp+88]
+/* 0x0040 239 */ add %g3,%lo(TwoToMinus16),%g2
+/* 0x0044 223 */ or %g0,%i1,%i2
+/* 0x0048 232 */ ldd [%l0],%f0
+/* 0x004c 239 */ add %g4,%lo(TwoTo16),%g3
+/* 0x0050 223 */ or %g0,%i3,%o0
+/* 0x0054 232 */ sll %o1,4,%g4
+/* 0x0058 239 */ ld [%o3+%g2],%g5
+/* 0x005c 223 */ or %g0,%i3,%i1
+/* 0x0060 239 */ ld [%o3+%g3],%g1
+/* 0x0064 232 */ or %g0,%o1,%i0
+/* 0x0068 */ or %g0,%o2,%i3
+/* 0x006c 234 */ cmp %o1,16
+/* 0x0070 */ be,pn %icc,.L77000279
+/* 0x0074 */ std %f0,[%o2+%g4]
+/* 0x0078 236 */ sll %o1,2,%g2
+/* 0x007c */ or %g0,%o0,%o3
+/* 0x0080 232 */ sll %o1,1,%o1
+/* 0x0084 236 */ add %g2,2,%o2
+/* 0x0088 */ cmp %o2,0
+/* 0x008c */ ble,a,pt %icc,.L900000660
+/* 0x0090 */ ldd [%i2],%f0
+
+! 241 ! pdtj=&(dt[0]);
+! 242 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
+! 243 ! {
+! 244 ! m2j=pdm2[j];
+! 245 ! a=pdtj[0]+pdn[0]*digit;
+! 246 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
+! 247 ! pdtj[1]=b;
+! 249 !#pragma pipeloop(0)
+! 250 ! for(i=1;i<nlen;i++)
+! 251 ! {
+! 252 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 253 ! }
+! 254 ! if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
+! 255 !
+! 256 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 257 ! }
+! 258 ! }
+! 259 ! else
+! 260 ! {
+! 261 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 263 ! dt[65]= dt[64]= dt[63]= dt[62]= dt[61]= dt[60]=
+! 264 ! dt[59]= dt[58]= dt[57]= dt[56]= dt[55]= dt[54]=
+! 265 ! dt[53]= dt[52]= dt[51]= dt[50]= dt[49]= dt[48]=
+! 266 ! dt[47]= dt[46]= dt[45]= dt[44]= dt[43]= dt[42]=
+! 267 ! dt[41]= dt[40]= dt[39]= dt[38]= dt[37]= dt[36]=
+! 268 ! dt[35]= dt[34]= dt[33]= dt[32]= dt[31]= dt[30]=
+! 269 ! dt[29]= dt[28]= dt[27]= dt[26]= dt[25]= dt[24]=
+! 270 ! dt[23]= dt[22]= dt[21]= dt[20]= dt[19]= dt[18]=
+! 271 ! dt[17]= dt[16]= dt[15]= dt[14]= dt[13]= dt[12]=
+! 272 ! dt[11]= dt[10]= dt[ 9]= dt[ 8]= dt[ 7]= dt[ 6]=
+! 273 ! dt[ 5]= dt[ 4]= dt[ 3]= dt[ 2]= dt[ 1]=Zero;
+! 275 ! pdn_0=pdn[0];
+! 276 ! pdm1_0=pdm1[0];
+! 278 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 279 ! pdtj=&(dt[0]);
+! 281 ! for(j=0;j<32;j++,pdtj++)
+
+/* 0x0094 281 */ add %g2,2,%o0
+/* 0x0098 236 */ add %g2,1,%o2
+/* 0x009c 281 */ cmp %o0,3
+/* 0x00a0 */ bl,pn %icc,.L77000280
+/* 0x00a4 */ or %g0,1,%o0
+/* 0x00a8 */ add %o3,8,%o3
+/* 0x00ac */ or %g0,1,%o4
+/* 0x00b0 */ std %f0,[%o3-8]
+ .L900000630:
+/* 0x00b4 */ std %f0,[%o3]
+/* 0x00b8 */ add %o4,2,%o4
+/* 0x00bc */ add %o3,16,%o3
+/* 0x00c0 */ cmp %o4,%g2
+/* 0x00c4 */ ble,pt %icc,.L900000630
+/* 0x00c8 */ std %f0,[%o3-8]
+ .L900000633:
+/* 0x00cc */ cmp %o4,%o2
+/* 0x00d0 */ bg,pn %icc,.L77000285
+/* 0x00d4 */ add %o4,1,%o0
+ .L77000280:
+/* 0x00d8 */ std %f0,[%o3]
+ .L900000659:
+/* 0x00dc */ ldd [%l0],%f0
+/* 0x00e0 */ cmp %o0,%o2
+/* 0x00e4 */ add %o3,8,%o3
+/* 0x00e8 */ add %o0,1,%o0
+/* 0x00ec */ ble,a,pt %icc,.L900000659
+/* 0x00f0 */ std %f0,[%o3]
+ .L77000285:
+/* 0x00f4 238 */ ldd [%i2],%f0
+ .L900000660:
+/* 0x00f8 238 */ ldd [%i3],%f2
+/* 0x00fc */ add %o1,1,%o2
+/* 0x0100 242 */ cmp %o1,0
+/* 0x0104 */ sll %o2,1,%o0
+/* 0x0108 */ sub %o1,1,%o1
+/* 0x010c 238 */ fmuld %f0,%f2,%f0
+/* 0x0110 */ std %f0,[%i1]
+/* 0x0114 0 */ or %g0,0,%l1
+/* 0x0118 */ ldd [%l0],%f6
+/* 0x011c */ or %g0,0,%g4
+/* 0x0120 */ or %g0,%o2,%i5
+/* 0x0124 */ ldd [%g5],%f2
+/* 0x0128 */ or %g0,%o1,%g3
+/* 0x012c */ or %g0,%o0,%o3
+/* 0x0130 */ fdtox %f0,%f0
+/* 0x0134 */ ldd [%g1],%f4
+/* 0x0138 246 */ add %i3,8,%o4
+/* 0x013c */ or %g0,0,%l2
+/* 0x0140 */ or %g0,%i1,%o5
+/* 0x0144 */ sub %i0,1,%o7
+/* 0x0148 */ fmovs %f6,%f0
+/* 0x014c */ fxtod %f0,%f0
+/* 0x0150 239 */ fmuld %f0,%f16,%f0
+/* 0x0154 */ fmuld %f0,%f2,%f2
+/* 0x0158 */ fdtox %f2,%f2
+/* 0x015c */ fxtod %f2,%f2
+/* 0x0160 */ fmuld %f2,%f4,%f2
+/* 0x0164 */ fsubd %f0,%f2,%f22
+/* 0x0168 242 */ ble,pt %icc,.L900000653
+/* 0x016c */ sll %i0,4,%g2
+/* 0x0170 246 */ ldd [%i4],%f0
+ .L900000654:
+/* 0x0174 246 */ fmuld %f0,%f22,%f8
+/* 0x0178 */ ldd [%i2],%f0
+/* 0x017c 250 */ cmp %i0,1
+/* 0x0180 246 */ ldd [%o4+%l2],%f6
+/* 0x0184 */ add %i2,8,%o0
+/* 0x0188 250 */ or %g0,1,%o1
+/* 0x018c 246 */ ldd [%o5],%f2
+/* 0x0190 */ add %o5,16,%l3
+/* 0x0194 */ fmuld %f0,%f6,%f6
+/* 0x0198 */ ldd [%g5],%f4
+/* 0x019c */ faddd %f2,%f8,%f2
+/* 0x01a0 */ ldd [%o5+8],%f0
+/* 0x01a4 244 */ ldd [%i3+%l2],%f20
+/* 0x01a8 246 */ faddd %f0,%f6,%f0
+/* 0x01ac */ fmuld %f2,%f4,%f2
+/* 0x01b0 */ faddd %f0,%f2,%f18
+/* 0x01b4 247 */ std %f18,[%o5+8]
+/* 0x01b8 250 */ ble,pt %icc,.L900000658
+/* 0x01bc */ srl %g4,31,%g2
+/* 0x01c0 */ cmp %o7,7
+/* 0x01c4 246 */ add %i4,8,%g2
+/* 0x01c8 250 */ bl,pn %icc,.L77000284
+/* 0x01cc */ add %g2,24,%o2
+/* 0x01d0 252 */ ldd [%o0+24],%f12
+/* 0x01d4 */ add %o5,48,%l3
+/* 0x01d8 */ ldd [%o0],%f2
+/* 0x01dc 0 */ or %g0,%o2,%g2
+/* 0x01e0 250 */ sub %o7,2,%o2
+/* 0x01e4 252 */ ldd [%g2-24],%f0
+/* 0x01e8 */ or %g0,5,%o1
+/* 0x01ec */ ldd [%o0+8],%f6
+/* 0x01f0 */ fmuld %f2,%f20,%f2
+/* 0x01f4 */ ldd [%o0+16],%f14
+/* 0x01f8 */ fmuld %f0,%f22,%f4
+/* 0x01fc */ add %o0,32,%o0
+/* 0x0200 */ ldd [%g2-16],%f8
+/* 0x0204 */ fmuld %f6,%f20,%f10
+/* 0x0208 */ ldd [%o5+16],%f0
+/* 0x020c */ ldd [%g2-8],%f6
+/* 0x0210 */ faddd %f2,%f4,%f4
+/* 0x0214 */ ldd [%o5+32],%f2
+ .L900000642:
+/* 0x0218 252 */ ldd [%g2],%f24
+/* 0x021c */ add %o1,3,%o1
+/* 0x0220 */ add %g2,24,%g2
+/* 0x0224 */ fmuld %f8,%f22,%f8
+/* 0x0228 */ ldd [%l3],%f28
+/* 0x022c */ cmp %o1,%o2
+/* 0x0230 */ add %o0,24,%o0
+/* 0x0234 */ ldd [%o0-24],%f26
+/* 0x0238 */ faddd %f0,%f4,%f0
+/* 0x023c */ add %l3,48,%l3
+/* 0x0240 */ faddd %f10,%f8,%f10
+/* 0x0244 */ fmuld %f14,%f20,%f4
+/* 0x0248 */ std %f0,[%l3-80]
+/* 0x024c */ ldd [%g2-16],%f8
+/* 0x0250 */ fmuld %f6,%f22,%f6
+/* 0x0254 */ ldd [%l3-32],%f0
+/* 0x0258 */ ldd [%o0-16],%f14
+/* 0x025c */ faddd %f2,%f10,%f2
+/* 0x0260 */ faddd %f4,%f6,%f10
+/* 0x0264 */ fmuld %f12,%f20,%f4
+/* 0x0268 */ std %f2,[%l3-64]
+/* 0x026c */ ldd [%g2-8],%f6
+/* 0x0270 */ fmuld %f24,%f22,%f24
+/* 0x0274 */ ldd [%l3-16],%f2
+/* 0x0278 */ ldd [%o0-8],%f12
+/* 0x027c */ faddd %f28,%f10,%f10
+/* 0x0280 */ std %f10,[%l3-48]
+/* 0x0284 */ fmuld %f26,%f20,%f10
+/* 0x0288 */ ble,pt %icc,.L900000642
+/* 0x028c */ faddd %f4,%f24,%f4
+ .L900000645:
+/* 0x0290 252 */ fmuld %f8,%f22,%f28
+/* 0x0294 */ ldd [%g2],%f24
+/* 0x0298 */ faddd %f0,%f4,%f26
+/* 0x029c */ fmuld %f12,%f20,%f8
+/* 0x02a0 */ add %l3,32,%l3
+/* 0x02a4 */ cmp %o1,%o7
+/* 0x02a8 */ fmuld %f14,%f20,%f14
+/* 0x02ac */ ldd [%l3-32],%f4
+/* 0x02b0 */ add %g2,8,%g2
+/* 0x02b4 */ faddd %f10,%f28,%f12
+/* 0x02b8 */ fmuld %f6,%f22,%f6
+/* 0x02bc */ ldd [%l3-16],%f0
+/* 0x02c0 */ fmuld %f24,%f22,%f10
+/* 0x02c4 */ std %f26,[%l3-64]
+/* 0x02c8 */ faddd %f2,%f12,%f2
+/* 0x02cc */ std %f2,[%l3-48]
+/* 0x02d0 */ faddd %f14,%f6,%f6
+/* 0x02d4 */ faddd %f8,%f10,%f2
+/* 0x02d8 */ faddd %f4,%f6,%f4
+/* 0x02dc */ std %f4,[%l3-32]
+/* 0x02e0 */ faddd %f0,%f2,%f0
+/* 0x02e4 */ bg,pn %icc,.L77000213
+/* 0x02e8 */ std %f0,[%l3-16]
+ .L77000284:
+/* 0x02ec 252 */ ldd [%o0],%f0
+ .L900000657:
+/* 0x02f0 252 */ ldd [%g2],%f4
+/* 0x02f4 */ fmuld %f0,%f20,%f2
+/* 0x02f8 */ add %o1,1,%o1
+/* 0x02fc */ ldd [%l3],%f0
+/* 0x0300 */ add %o0,8,%o0
+/* 0x0304 */ add %g2,8,%g2
+/* 0x0308 */ fmuld %f4,%f22,%f4
+/* 0x030c */ cmp %o1,%o7
+/* 0x0310 */ faddd %f2,%f4,%f2
+/* 0x0314 */ faddd %f0,%f2,%f0
+/* 0x0318 */ std %f0,[%l3]
+/* 0x031c */ add %l3,16,%l3
+/* 0x0320 */ ble,a,pt %icc,.L900000657
+/* 0x0324 */ ldd [%o0],%f0
+ .L77000213:
+/* 0x0328 */ srl %g4,31,%g2
+ .L900000658:
+/* 0x032c 254 */ cmp %l1,30
+/* 0x0330 */ bne,a,pt %icc,.L900000656
+/* 0x0334 */ fdtox %f18,%f0
+/* 0x0338 */ add %g4,%g2,%g2
+/* 0x033c */ sra %g2,1,%o0
+/* 0x0340 281 */ ldd [%l0],%f0
+/* 0x0344 */ sll %i5,1,%o2
+/* 0x0348 */ add %o0,1,%g2
+/* 0x034c */ sll %g2,1,%o0
+/* 0x0350 254 */ sub %o2,1,%o2
+/* 0x0354 281 */ fmovd %f0,%f2
+/* 0x0358 */ sll %g2,4,%o1
+/* 0x035c */ cmp %o0,%o3
+/* 0x0360 */ bge,pt %icc,.L77000215
+/* 0x0364 */ or %g0,0,%l1
+/* 0x0368 254 */ add %i1,%o1,%o1
+/* 0x036c 281 */ ldd [%o1],%f6
+ .L900000655:
+/* 0x0370 */ fdtox %f6,%f10
+/* 0x0374 */ ldd [%o1+8],%f4
+/* 0x0378 */ add %o0,2,%o0
+/* 0x037c */ ldd [%l0],%f12
+/* 0x0380 */ fdtox %f6,%f6
+/* 0x0384 */ cmp %o0,%o2
+/* 0x0388 */ fdtox %f4,%f8
+/* 0x038c */ fdtox %f4,%f4
+/* 0x0390 */ fmovs %f12,%f10
+/* 0x0394 */ fmovs %f12,%f8
+/* 0x0398 */ fxtod %f10,%f10
+/* 0x039c */ fxtod %f8,%f8
+/* 0x03a0 */ faddd %f10,%f2,%f2
+/* 0x03a4 */ std %f2,[%o1]
+/* 0x03a8 */ faddd %f8,%f0,%f0
+/* 0x03ac */ std %f0,[%o1+8]
+/* 0x03b0 */ add %o1,16,%o1
+/* 0x03b4 */ fitod %f6,%f2
+/* 0x03b8 */ fitod %f4,%f0
+/* 0x03bc */ ble,a,pt %icc,.L900000655
+/* 0x03c0 */ ldd [%o1],%f6
+ .L77000233:
+/* 0x03c4 */ or %g0,0,%l1
+ .L77000215:
+/* 0x03c8 */ fdtox %f18,%f0
+ .L900000656:
+/* 0x03cc */ ldd [%l0],%f6
+/* 0x03d0 256 */ add %g4,1,%g4
+/* 0x03d4 */ add %l2,8,%l2
+/* 0x03d8 */ ldd [%g5],%f2
+/* 0x03dc */ add %l1,1,%l1
+/* 0x03e0 */ add %o5,8,%o5
+/* 0x03e4 */ fmovs %f6,%f0
+/* 0x03e8 */ ldd [%g1],%f4
+/* 0x03ec */ cmp %g4,%g3
+/* 0x03f0 */ fxtod %f0,%f0
+/* 0x03f4 */ fmuld %f0,%f16,%f0
+/* 0x03f8 */ fmuld %f0,%f2,%f2
+/* 0x03fc */ fdtox %f2,%f2
+/* 0x0400 */ fxtod %f2,%f2
+/* 0x0404 */ fmuld %f2,%f4,%f2
+/* 0x0408 */ fsubd %f0,%f2,%f22
+/* 0x040c */ ble,a,pt %icc,.L900000654
+/* 0x0410 */ ldd [%i4],%f0
+ .L900000629:
+/* 0x0414 256 */ ba .L900000653
+/* 0x0418 */ sll %i0,4,%g2
+ .L77000279:
+/* 0x041c 261 */ ldd [%o2],%f6
+/* 0x0420 279 */ or %g0,%o0,%o4
+/* 0x0424 281 */ or %g0,0,%o3
+/* 0x0428 261 */ ldd [%i2],%f4
+/* 0x042c 273 */ std %f0,[%o0+8]
+/* 0x0430 */ std %f0,[%o0+16]
+/* 0x0434 261 */ fmuld %f4,%f6,%f4
+/* 0x0438 */ std %f4,[%o0]
+/* 0x043c 273 */ std %f0,[%o0+24]
+/* 0x0440 */ std %f0,[%o0+32]
+/* 0x0444 */ fdtox %f4,%f4
+/* 0x0448 */ std %f0,[%o0+40]
+/* 0x044c */ std %f0,[%o0+48]
+/* 0x0450 */ std %f0,[%o0+56]
+/* 0x0454 */ std %f0,[%o0+64]
+/* 0x0458 */ std %f0,[%o0+72]
+/* 0x045c */ std %f0,[%o0+80]
+/* 0x0460 */ std %f0,[%o0+88]
+/* 0x0464 */ std %f0,[%o0+96]
+/* 0x0468 */ std %f0,[%o0+104]
+/* 0x046c */ std %f0,[%o0+112]
+/* 0x0470 */ std %f0,[%o0+120]
+/* 0x0474 */ std %f0,[%o0+128]
+/* 0x0478 */ std %f0,[%o0+136]
+/* 0x047c */ std %f0,[%o0+144]
+/* 0x0480 */ std %f0,[%o0+152]
+/* 0x0484 */ std %f0,[%o0+160]
+/* 0x0488 */ std %f0,[%o0+168]
+/* 0x048c */ fmovs %f0,%f4
+/* 0x0490 */ std %f0,[%o0+176]
+/* 0x0494 281 */ or %g0,0,%o1
+/* 0x0498 273 */ std %f0,[%o0+184]
+/* 0x049c */ fxtod %f4,%f4
+/* 0x04a0 */ std %f0,[%o0+192]
+/* 0x04a4 */ std %f0,[%o0+200]
+/* 0x04a8 */ std %f0,[%o0+208]
+/* 0x04ac 278 */ fmuld %f4,%f2,%f2
+/* 0x04b0 273 */ std %f0,[%o0+216]
+/* 0x04b4 */ std %f0,[%o0+224]
+/* 0x04b8 */ std %f0,[%o0+232]
+/* 0x04bc */ std %f0,[%o0+240]
+/* 0x04c0 */ std %f0,[%o0+248]
+/* 0x04c4 */ std %f0,[%o0+256]
+/* 0x04c8 */ std %f0,[%o0+264]
+/* 0x04cc */ std %f0,[%o0+272]
+/* 0x04d0 */ std %f0,[%o0+280]
+/* 0x04d4 */ std %f0,[%o0+288]
+/* 0x04d8 */ std %f0,[%o0+296]
+/* 0x04dc */ std %f0,[%o0+304]
+/* 0x04e0 */ std %f0,[%o0+312]
+/* 0x04e4 */ std %f0,[%o0+320]
+/* 0x04e8 */ std %f0,[%o0+328]
+/* 0x04ec */ std %f0,[%o0+336]
+/* 0x04f0 */ std %f0,[%o0+344]
+/* 0x04f4 */ std %f0,[%o0+352]
+/* 0x04f8 */ std %f0,[%o0+360]
+/* 0x04fc */ std %f0,[%o0+368]
+/* 0x0500 */ std %f0,[%o0+376]
+/* 0x0504 */ std %f0,[%o0+384]
+/* 0x0508 */ std %f0,[%o0+392]
+/* 0x050c */ std %f0,[%o0+400]
+/* 0x0510 */ std %f0,[%o0+408]
+/* 0x0514 */ std %f0,[%o0+416]
+/* 0x0518 */ std %f0,[%o0+424]
+/* 0x051c */ std %f0,[%o0+432]
+/* 0x0520 */ std %f0,[%o0+440]
+/* 0x0524 */ std %f0,[%o0+448]
+/* 0x0528 */ std %f0,[%o0+456]
+/* 0x052c */ std %f0,[%o0+464]
+/* 0x0530 */ std %f0,[%o0+472]
+/* 0x0534 */ std %f0,[%o0+480]
+/* 0x0538 */ std %f0,[%o0+488]
+/* 0x053c */ std %f0,[%o0+496]
+/* 0x0540 */ std %f0,[%o0+504]
+/* 0x0544 */ std %f0,[%o0+512]
+/* 0x0548 */ std %f0,[%o0+520]
+/* 0x054c */ ldd [%g5],%f0
+/* 0x0550 */ ldd [%g1],%f8
+/* 0x0554 */ fmuld %f2,%f0,%f6
+/* 0x0558 275 */ ldd [%i4],%f4
+/* 0x055c 276 */ ldd [%i2],%f0
+/* 0x0560 */ fdtox %f6,%f6
+/* 0x0564 */ fxtod %f6,%f6
+/* 0x0568 */ fmuld %f6,%f8,%f6
+/* 0x056c */ fsubd %f2,%f6,%f2
+/* 0x0570 286 */ fmuld %f4,%f2,%f12
+
+! 282 ! {
+! 284 ! m2j=pdm2[j];
+! 285 ! a=pdtj[0]+pdn_0*digit;
+! 286 ! b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
+
+! 287 ! pdtj[1]=b;
+! 289 ! /**** this loop will be fully unrolled:
+! 290 ! for(i=1;i<16;i++)
+! 291 ! {
+! 292 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 293 ! }
+! 294 ! *************************************/
+! 295 ! pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
+! 296 ! pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
+! 297 ! pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
+! 298 ! pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
+! 299 ! pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
+! 300 ! pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
+! 301 ! pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
+! 302 ! pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
+! 303 ! pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
+! 304 ! pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
+! 305 ! pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
+! 306 ! pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
+! 307 ! pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
+! 308 ! pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
+! 309 ! pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
+! 310 ! /* no need for cleenup, cannot overflow */
+! 311 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+
+ fmovd %f2,%f0 ! hand modified
+ fmovd %f16,%f18 ! hand modified
+ ldd [%i4],%f2
+ ldd [%o4],%f8
+ ldd [%i2],%f10
+ ldd [%g5],%f14 ! hand modified
+ ldd [%g1],%f16 ! hand modified
+ ldd [%i3],%f24
+
+ ldd [%i2+8],%f26
+ ldd [%i2+16],%f40
+ ldd [%i2+48],%f46
+ ldd [%i2+56],%f30
+ ldd [%i2+64],%f54
+ ldd [%i2+104],%f34
+ ldd [%i2+112],%f58
+
+ ldd [%i4+8],%f28
+ ldd [%i4+104],%f38
+ ldd [%i4+112],%f60
+
+ .L99999999: !1
+ ldd [%i2+24],%f32
+ fmuld %f0,%f2,%f4 !2
+ ldd [%i4+24],%f36
+ fmuld %f26,%f24,%f20 !3
+ ldd [%i2+40],%f42
+ fmuld %f28,%f0,%f22 !4
+ ldd [%i4+40],%f44
+ fmuld %f32,%f24,%f32 !5
+ ldd [%i3+8],%f6
+ faddd %f4,%f8,%f4
+ fmuld %f36,%f0,%f36 !6
+ add %i3,8,%i3
+ ldd [%i4+56],%f50
+ fmuld %f42,%f24,%f42 !7
+ ldd [%i2+72],%f52
+ faddd %f20,%f22,%f20
+ fmuld %f44,%f0,%f44 !8
+ ldd [%o4+16],%f22
+ fmuld %f10,%f6,%f12 !9
+ ldd [%i4+72],%f56
+ faddd %f32,%f36,%f32
+ fmuld %f14,%f4,%f4 !10
+ ldd [%o4+48],%f36
+ fmuld %f30,%f24,%f48 !11
+ ldd [%o4+8],%f8
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50 !12
+ std %f20,[%o4+16]
+ faddd %f42,%f44,%f42
+ fmuld %f52,%f24,%f52 !13
+ ldd [%o4+80],%f44
+ faddd %f4,%f12,%f4
+ fmuld %f56,%f0,%f56 !14
+ ldd [%i2+88],%f20
+ faddd %f32,%f36,%f32 !15
+ ldd [%i4+88],%f22
+ faddd %f48,%f50,%f48 !16
+ ldd [%o4+112],%f50
+ faddd %f52,%f56,%f52 !17
+ ldd [%o4+144],%f56
+ faddd %f4,%f8,%f8
+ fmuld %f20,%f24,%f20 !18
+ std %f32,[%o4+48]
+ faddd %f42,%f44,%f42
+ fmuld %f22,%f0,%f22 !19
+ std %f42,[%o4+80]
+ faddd %f48,%f50,%f48
+ fmuld %f34,%f24,%f32 !20
+ std %f48,[%o4+112]
+ faddd %f52,%f56,%f52
+ fmuld %f38,%f0,%f36 !21
+ ldd [%i2+120],%f42
+ fdtox %f8,%f4 !22
+ std %f52,[%o4+144]
+ faddd %f20,%f22,%f20 !23
+ ldd [%i4+120],%f44 !24
+ ldd [%o4+176],%f22
+ faddd %f32,%f36,%f32
+ fmuld %f42,%f24,%f42 !25
+ ldd [%i4+16],%f50
+ fmovs %f17,%f4 !26
+ ldd [%i2+32],%f52
+ fmuld %f44,%f0,%f44 !27
+ ldd [%i4+32],%f56
+ fmuld %f40,%f24,%f48 !28
+ ldd [%o4+208],%f36
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50 !29
+ std %f20,[%o4+176]
+ fxtod %f4,%f4
+ fmuld %f52,%f24,%f52 !30
+ ldd [%i4+48],%f22
+ faddd %f42,%f44,%f42
+ fmuld %f56,%f0,%f56 !31
+ ldd [%o4+240],%f44
+ faddd %f32,%f36,%f32 !32
+ std %f32,[%o4+208]
+ faddd %f48,%f50,%f48
+ fmuld %f46,%f24,%f20 !33
+ ldd [%o4+32],%f50
+ fmuld %f4,%f18,%f12 !34
+ ldd [%i4+64],%f36
+ faddd %f52,%f56,%f52
+ fmuld %f22,%f0,%f22 !35
+ ldd [%o4+64],%f56
+ faddd %f42,%f44,%f42 !36
+ std %f42,[%o4+240]
+ faddd %f48,%f50,%f48
+ fmuld %f54,%f24,%f32 !37
+ std %f48,[%o4+32]
+ fmuld %f12,%f14,%f4 !38
+ ldd [%i2+80],%f42
+ faddd %f52,%f56,%f56 ! yes, tmp52!
+ fmuld %f36,%f0,%f36 !39
+ ldd [%i4+80],%f44
+ faddd %f20,%f22,%f20 !40
+ ldd [%i2+96],%f48
+ fmuld %f58,%f24,%f52 !41
+ ldd [%i4+96],%f50
+ fdtox %f4,%f4
+ fmuld %f42,%f24,%f42 !42
+ std %f56,[%o4+64] ! yes, tmp52!
+ faddd %f32,%f36,%f32
+ fmuld %f44,%f0,%f44 !43
+ ldd [%o4+96],%f22
+ fmuld %f48,%f24,%f48 !44
+ ldd [%o4+128],%f36
+ fmovd %f6,%f24
+ fmuld %f50,%f0,%f50 !45
+ fxtod %f4,%f4
+ fmuld %f60,%f0,%f56 !46
+ add %o4,8,%o4
+ faddd %f42,%f44,%f42 !47
+ ldd [%o4+160-8],%f44
+ faddd %f20,%f22,%f20 !48
+ std %f20,[%o4+96-8]
+ faddd %f48,%f50,%f48 !49
+ ldd [%o4+192-8],%f50
+ faddd %f52,%f56,%f52
+ fmuld %f4,%f16,%f4 !50
+ ldd [%o4+224-8],%f56
+ faddd %f32,%f36,%f32 !51
+ std %f32,[%o4+128-8]
+ faddd %f42,%f44,%f42 !52
+ add %o3,1,%o3
+ std %f42,[%o4+160-8]
+ faddd %f48,%f50,%f48 !53
+ cmp %o3,31
+ std %f48,[%o4+192-8]
+ fsubd %f12,%f4,%f0 !54
+ faddd %f52,%f56,%f52
+ ble,pt %icc,.L99999999
+ std %f52,[%o4+224-8] !55
+ std %f8,[%o4]
+
+! 312 ! }
+! 313 ! }
+! 315 ! conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
+
+/* 0x07c8 315 */ sll %i0,4,%g2
+ .L900000653:
+/* 0x07cc 315 */ add %i1,%g2,%i1
+/* 0x07d0 242 */ ld [%fp+68],%o0
+/* 0x07d4 315 */ or %g0,0,%o4
+/* 0x07d8 */ ldd [%i1],%f0
+/* 0x07dc */ or %g0,0,%g5
+/* 0x07e0 */ cmp %i0,0
+/* 0x07e4 242 */ or %g0,%o0,%o3
+/* 0x07e8 311 */ sub %i0,1,%g1
+/* 0x07ec 315 */ fdtox %f0,%f0
+/* 0x07f0 */ std %f0,[%sp+120]
+/* 0x07f4 311 */ sethi %hi(0xfc00),%o1
+/* 0x07f8 */ add %g1,1,%g3
+/* 0x07fc */ or %g0,%o0,%g4
+/* 0x0800 315 */ ldd [%i1+8],%f0
+/* 0x0804 */ add %o1,1023,%o1
+/* 0x0808 */ fdtox %f0,%f0
+/* 0x080c */ std %f0,[%sp+112]
+/* 0x0810 */ ldx [%sp+112],%o5
+/* 0x0814 */ ldx [%sp+120],%o7
+/* 0x0818 */ ble,pt %icc,.L900000651
+/* 0x081c */ sethi %hi(0xfc00),%g2
+/* 0x0820 311 */ or %g0,-1,%g2
+/* 0x0824 315 */ cmp %g3,3
+/* 0x0828 311 */ srl %g2,0,%o2
+/* 0x082c 315 */ bl,pn %icc,.L77000287
+/* 0x0830 */ or %g0,%i1,%g2
+/* 0x0834 */ ldd [%i1+16],%f0
+/* 0x0838 */ and %o5,%o1,%o0
+/* 0x083c */ add %i1,16,%g2
+/* 0x0840 */ sllx %o0,16,%g3
+/* 0x0844 */ and %o7,%o2,%o0
+/* 0x0848 */ fdtox %f0,%f0
+/* 0x084c */ std %f0,[%sp+104]
+/* 0x0850 */ add %o0,%g3,%o4
+/* 0x0854 */ ldd [%i1+24],%f2
+/* 0x0858 */ srax %o5,16,%o0
+/* 0x085c */ add %o3,4,%g4
+/* 0x0860 */ stx %o0,[%sp+128]
+/* 0x0864 */ and %o4,%o2,%o0
+/* 0x0868 */ stx %o0,[%sp+112]
+/* 0x086c */ srax %o4,32,%o0
+/* 0x0870 */ fdtox %f2,%f0
+/* 0x0874 */ stx %o0,[%sp+136]
+/* 0x0878 */ srax %o7,32,%o4
+/* 0x087c */ std %f0,[%sp+96]
+/* 0x0880 */ ldx [%sp+128],%g5
+/* 0x0884 */ ldx [%sp+136],%o7
+/* 0x0888 */ ldx [%sp+104],%g3
+/* 0x088c */ add %g5,%o7,%o0
+/* 0x0890 */ or %g0,1,%g5
+/* 0x0894 */ ldx [%sp+112],%o7
+/* 0x0898 */ add %o4,%o0,%o4
+/* 0x089c */ ldx [%sp+96],%o5
+/* 0x08a0 */ st %o7,[%o3]
+/* 0x08a4 */ or %g0,%g3,%o7
+ .L900000634:
+/* 0x08a8 */ ldd [%g2+16],%f0
+/* 0x08ac */ add %g5,1,%g5
+/* 0x08b0 */ add %g4,4,%g4
+/* 0x08b4 */ cmp %g5,%g1
+/* 0x08b8 */ add %g2,16,%g2
+/* 0x08bc */ fdtox %f0,%f0
+/* 0x08c0 */ std %f0,[%sp+104]
+/* 0x08c4 */ ldd [%g2+8],%f0
+/* 0x08c8 */ fdtox %f0,%f0
+/* 0x08cc */ std %f0,[%sp+96]
+/* 0x08d0 */ and %o5,%o1,%g3
+/* 0x08d4 */ sllx %g3,16,%g3
+/* 0x08d8 */ stx %g3,[%sp+120]
+/* 0x08dc */ and %o7,%o2,%g3
+/* 0x08e0 */ stx %o7,[%sp+128]
+/* 0x08e4 */ ldx [%sp+120],%o7
+/* 0x08e8 */ add %g3,%o7,%g3
+/* 0x08ec */ ldx [%sp+128],%o7
+/* 0x08f0 */ srax %o5,16,%o5
+/* 0x08f4 */ add %g3,%o4,%g3
+/* 0x08f8 */ srax %g3,32,%o4
+/* 0x08fc */ stx %o4,[%sp+112]
+/* 0x0900 */ srax %o7,32,%o4
+/* 0x0904 */ ldx [%sp+112],%o7
+/* 0x0908 */ add %o5,%o7,%o7
+/* 0x090c */ ldx [%sp+96],%o5
+/* 0x0910 */ add %o4,%o7,%o4
+/* 0x0914 */ and %g3,%o2,%g3
+/* 0x0918 */ ldx [%sp+104],%o7
+/* 0x091c */ ble,pt %icc,.L900000634
+/* 0x0920 */ st %g3,[%g4-4]
+ .L900000637:
+/* 0x0924 */ ba .L900000651
+/* 0x0928 */ sethi %hi(0xfc00),%g2
+ .L77000287:
+/* 0x092c */ ldd [%g2+16],%f0
+ .L900000650:
+/* 0x0930 */ and %o7,%o2,%o0
+/* 0x0934 */ and %o5,%o1,%g3
+/* 0x0938 */ fdtox %f0,%f0
+/* 0x093c */ add %o4,%o0,%o0
+/* 0x0940 */ std %f0,[%sp+104]
+/* 0x0944 */ add %g5,1,%g5
+/* 0x0948 */ sllx %g3,16,%o4
+/* 0x094c */ ldd [%g2+24],%f2
+/* 0x0950 */ add %g2,16,%g2
+/* 0x0954 */ add %o0,%o4,%o4
+/* 0x0958 */ cmp %g5,%g1
+/* 0x095c */ srax %o5,16,%o0
+/* 0x0960 */ stx %o0,[%sp+112]
+/* 0x0964 */ and %o4,%o2,%g3
+/* 0x0968 */ srax %o4,32,%o5
+/* 0x096c */ fdtox %f2,%f0
+/* 0x0970 */ std %f0,[%sp+96]
+/* 0x0974 */ srax %o7,32,%o4
+/* 0x0978 */ ldx [%sp+112],%o7
+/* 0x097c */ add %o7,%o5,%o7
+/* 0x0980 */ ldx [%sp+104],%o5
+/* 0x0984 */ add %o4,%o7,%o4
+/* 0x0988 */ ldx [%sp+96],%o0
+/* 0x098c */ st %g3,[%g4]
+/* 0x0990 */ or %g0,%o5,%o7
+/* 0x0994 */ add %g4,4,%g4
+/* 0x0998 */ or %g0,%o0,%o5
+/* 0x099c */ ble,a,pt %icc,.L900000650
+/* 0x09a0 */ ldd [%g2+16],%f0
+ .L77000236:
+/* 0x09a4 */ sethi %hi(0xfc00),%g2
+ .L900000651:
+/* 0x09a8 */ or %g0,-1,%o0
+/* 0x09ac */ add %g2,1023,%g2
+/* 0x09b0 */ ld [%fp+88],%o1
+/* 0x09b4 */ srl %o0,0,%g3
+/* 0x09b8 */ and %o5,%g2,%g2
+/* 0x09bc */ and %o7,%g3,%g4
+
+! 317 ! adjust_montf_result(result,nint,nlen);
+
+/* 0x09c0 317 */ or %g0,-1,%o5
+/* 0x09c4 311 */ sllx %g2,16,%g2
+/* 0x09c8 */ add %o4,%g4,%g4
+/* 0x09cc */ add %g4,%g2,%g2
+/* 0x09d0 */ sll %g5,2,%g4
+/* 0x09d4 */ and %g2,%g3,%g2
+/* 0x09d8 */ st %g2,[%o3+%g4]
+/* 0x09dc 317 */ sll %i0,2,%g2
+/* 0x09e0 */ ld [%o3+%g2],%g2
+/* 0x09e4 */ cmp %g2,0
+/* 0x09e8 */ bleu,pn %icc,.L77000241
+/* 0x09ec */ or %g0,%o1,%o2
+/* 0x09f0 */ ba .L900000649
+/* 0x09f4 */ cmp %o5,0
+ .L77000241:
+/* 0x09f8 */ sub %i0,1,%o5
+/* 0x09fc */ sll %o5,2,%g2
+/* 0x0a00 */ cmp %o5,0
+/* 0x0a04 */ bl,pt %icc,.L900000649
+/* 0x0a08 */ cmp %o5,0
+/* 0x0a0c */ add %o1,%g2,%o1
+/* 0x0a10 */ add %o3,%g2,%o4
+/* 0x0a14 */ ld [%o1],%g2
+ .L900000648:
+/* 0x0a18 */ ld [%o4],%g3
+/* 0x0a1c */ sub %o5,1,%o0
+/* 0x0a20 */ sub %o1,4,%o1
+/* 0x0a24 */ sub %o4,4,%o4
+/* 0x0a28 */ cmp %g3,%g2
+/* 0x0a2c */ bne,pn %icc,.L77000244
+/* 0x0a30 */ nop
+/* 0x0a34 0 */ or %g0,%o0,%o5
+/* 0x0a38 317 */ cmp %o0,0
+/* 0x0a3c */ bge,a,pt %icc,.L900000648
+/* 0x0a40 */ ld [%o1],%g2
+ .L77000244:
+/* 0x0a44 */ cmp %o5,0
+ .L900000649:
+/* 0x0a48 */ bl,pn %icc,.L77000288
+/* 0x0a4c */ sll %o5,2,%g2
+/* 0x0a50 */ ld [%o2+%g2],%g3
+/* 0x0a54 */ ld [%o3+%g2],%g2
+/* 0x0a58 */ cmp %g2,%g3
+/* 0x0a5c */ bleu,pt %icc,.L77000224
+/* 0x0a60 */ nop
+ .L77000288:
+/* 0x0a64 */ cmp %i0,0
+/* 0x0a68 */ ble,pt %icc,.L77000224
+/* 0x0a6c */ nop
+/* 0x0a70 317 */ sub %i0,1,%o7
+/* 0x0a74 */ or %g0,-1,%g2
+/* 0x0a78 */ srl %g2,0,%o4
+/* 0x0a7c */ add %o7,1,%o0
+/* 0x0a80 315 */ or %g0,0,%o5
+/* 0x0a84 */ or %g0,0,%g1
+/* 0x0a88 */ cmp %o0,3
+/* 0x0a8c */ bl,pn %icc,.L77000289
+/* 0x0a90 */ add %o3,8,%o1
+/* 0x0a94 */ add %o2,4,%o0
+/* 0x0a98 */ ld [%o1-8],%g2
+/* 0x0a9c 0 */ or %g0,%o1,%o3
+/* 0x0aa0 315 */ ld [%o0-4],%g3
+/* 0x0aa4 0 */ or %g0,%o0,%o2
+/* 0x0aa8 315 */ or %g0,2,%g1
+/* 0x0aac */ ld [%o3-4],%o0
+/* 0x0ab0 */ sub %g2,%g3,%g2
+/* 0x0ab4 */ or %g0,%g2,%o5
+/* 0x0ab8 */ and %g2,%o4,%g2
+/* 0x0abc */ st %g2,[%o3-8]
+/* 0x0ac0 */ srax %o5,32,%o5
+ .L900000638:
+/* 0x0ac4 */ ld [%o2],%g2
+/* 0x0ac8 */ add %g1,1,%g1
+/* 0x0acc */ add %o2,4,%o2
+/* 0x0ad0 */ cmp %g1,%o7
+/* 0x0ad4 */ add %o3,4,%o3
+/* 0x0ad8 */ sub %o0,%g2,%o0
+/* 0x0adc */ add %o0,%o5,%o5
+/* 0x0ae0 */ and %o5,%o4,%g2
+/* 0x0ae4 */ ld [%o3-4],%o0
+/* 0x0ae8 */ st %g2,[%o3-8]
+/* 0x0aec */ ble,pt %icc,.L900000638
+/* 0x0af0 */ srax %o5,32,%o5
+ .L900000641:
+/* 0x0af4 */ ld [%o2],%o1
+/* 0x0af8 */ sub %o0,%o1,%o0
+/* 0x0afc */ add %o0,%o5,%o0
+/* 0x0b00 */ and %o0,%o4,%o1
+/* 0x0b04 */ st %o1,[%o3-4]
+/* 0x0b08 */ ret ! Result =
+/* 0x0b0c */ restore %g0,%g0,%g0
+ .L77000289:
+/* 0x0b10 */ ld [%o3],%o0
+ .L900000647:
+/* 0x0b14 */ ld [%o2],%o1
+/* 0x0b18 */ add %o5,%o0,%o0
+/* 0x0b1c */ add %g1,1,%g1
+/* 0x0b20 */ add %o2,4,%o2
+/* 0x0b24 */ cmp %g1,%o7
+/* 0x0b28 */ sub %o0,%o1,%o0
+/* 0x0b2c */ and %o0,%o4,%o1
+/* 0x0b30 */ st %o1,[%o3]
+/* 0x0b34 */ add %o3,4,%o3
+/* 0x0b38 */ srax %o0,32,%o5
+/* 0x0b3c */ ble,a,pt %icc,.L900000647
+/* 0x0b40 */ ld [%o3],%o0
+ .L77000224:
+/* 0x0b44 */ ret ! Result =
+/* 0x0b48 */ restore %g0,%g0,%g0
+/* 0x0b4c 0 */ .type mont_mulf_noconv,2
+/* 0x0b4c */ .size mont_mulf_noconv,(.-mont_mulf_noconv)
+
diff --git a/security/nss/lib/freebl/mpi/montmulfv9.il b/security/nss/lib/freebl/mpi/montmulfv9.il
new file mode 100644
index 0000000000..006f47431c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulfv9.il
@@ -0,0 +1,93 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+!
+! double upper32(double /*frs1*/);
+!
+ .inline upper32,8
+ fdtox %f0,%f10
+ fitod %f10,%f0
+ .end
+
+!
+! double lower32(double /*frs1*/, double /* Zero */);
+!
+ .inline lower32,8
+ fdtox %f0,%f10
+ fmovs %f2,%f10
+ fxtod %f10,%f0
+ .end
+
+!
+! double mod(double /*x*/, double /*1/m*/, double /*m*/);
+!
+ .inline mod,12
+ fmuld %f0,%f2,%f2
+ fdtox %f2,%f2
+ fxtod %f2,%f2
+ fmuld %f2,%f4,%f2
+ fsubd %f0,%f2,%f0
+ .end
+
+
+!
+! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
+! double * /* 0 */,
+! double * /*result16*/, double * /* result32 */
+! float * /*source - should be unsigned int*
+! converted to float* */);
+!
+ .inline i16_to_d16_and_d32x4,24
+ ldd [%o0],%f2 ! 1/(2^16)
+ ldd [%o1],%f4 ! 2^16
+ ldd [%o2],%f22
+
+ fmovd %f22,%f6
+ ld [%o5],%f7
+ fmovd %f22,%f10
+ ld [%o5+4],%f11
+ fmovd %f22,%f14
+ ld [%o5+8],%f15
+ fmovd %f22,%f18
+ ld [%o5+12],%f19
+ fxtod %f6,%f6
+ std %f6,[%o4]
+ fxtod %f10,%f10
+ std %f10,[%o4+8]
+ fxtod %f14,%f14
+ std %f14,[%o4+16]
+ fxtod %f18,%f18
+ std %f18,[%o4+24]
+ fmuld %f2,%f6,%f8
+ fmuld %f2,%f10,%f12
+ fmuld %f2,%f14,%f16
+ fmuld %f2,%f18,%f20
+ fdtox %f8,%f8
+ fdtox %f12,%f12
+ fdtox %f16,%f16
+ fdtox %f20,%f20
+ fxtod %f8,%f8
+ std %f8,[%o3+8]
+ fxtod %f12,%f12
+ std %f12,[%o3+24]
+ fxtod %f16,%f16
+ std %f16,[%o3+40]
+ fxtod %f20,%f20
+ std %f20,[%o3+56]
+ fmuld %f8,%f4,%f8
+ fmuld %f12,%f4,%f12
+ fmuld %f16,%f4,%f16
+ fmuld %f20,%f4,%f20
+ fsubd %f6,%f8,%f8
+ std %f8,[%o3]
+ fsubd %f10,%f12,%f12
+ std %f12,[%o3+16]
+ fsubd %f14,%f16,%f16
+ std %f16,[%o3+32]
+ fsubd %f18,%f20,%f20
+ std %f20,[%o3+48]
+ .end
+
+
diff --git a/security/nss/lib/freebl/mpi/montmulfv9.s b/security/nss/lib/freebl/mpi/montmulfv9.s
new file mode 100644
index 0000000000..560e47f7bc
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulfv9.s
@@ -0,0 +1,2346 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .section ".text",#alloc,#execinstr
+ .file "montmulf.c"
+
+ .section ".rodata",#alloc
+ .global TwoTo16
+ .align 8
+!
+! CONSTANT POOL
+!
+ .global TwoTo16
+TwoTo16:
+ .word 1089470464
+ .word 0
+ .type TwoTo16,#object
+ .size TwoTo16,8
+ .global TwoToMinus16
+!
+! CONSTANT POOL
+!
+ .global TwoToMinus16
+TwoToMinus16:
+ .word 1055916032
+ .word 0
+ .type TwoToMinus16,#object
+ .size TwoToMinus16,8
+ .global Zero
+!
+! CONSTANT POOL
+!
+ .global Zero
+Zero:
+ .word 0
+ .word 0
+ .type Zero,#object
+ .size Zero,8
+ .global TwoTo32
+!
+! CONSTANT POOL
+!
+ .global TwoTo32
+TwoTo32:
+ .word 1106247680
+ .word 0
+ .type TwoTo32,#object
+ .size TwoTo32,8
+ .global TwoToMinus32
+!
+! CONSTANT POOL
+!
+ .global TwoToMinus32
+TwoToMinus32:
+ .word 1039138816
+ .word 0
+ .type TwoToMinus32,#object
+ .size TwoToMinus32,8
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .register %g3,#scratch
+/* 000000 */ .register %g2,#scratch
+/* 000000 0 */ .align 8
+!
+! SUBROUTINE conv_d16_to_i32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_d16_to_i32
+ conv_d16_to_i32:
+/* 000000 */ save %sp,-208,%sp
+! FILE montmulf.c
+
+! 1 !/*
+! 2 ! * The contents of this file are subject to the Mozilla Public
+! 3 ! * License Version 1.1 (the "License"); you may not use this file
+! 4 ! * except in compliance with the License. You may obtain a copy of
+! 5 ! * the License at http://www.mozilla.org/MPL/
+! 6 ! *
+! 7 ! * Software distributed under the License is distributed on an "AS
+! 8 ! * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+! 9 ! * implied. See the License for the specific language governing
+! 10 ! * rights and limitations under the License.
+! 11 ! *
+! 12 ! * The Original Code is SPARC optimized Montgomery multiply functions.
+! 13 ! *
+! 14 ! * The Initial Developer of the Original Code is Sun Microsystems Inc.
+! 15 ! * Portions created by Sun Microsystems Inc. are
+! 16 ! * Copyright (C) 1999-2000 Sun Microsystems Inc. All Rights Reserved.
+! 17 ! *
+! 18 ! * Contributor(s):
+! 19 ! * Netscape Communications Corporation
+! 20 ! *
+! 21 ! * Alternatively, the contents of this file may be used under the
+! 22 ! * terms of the GNU General Public License Version 2 or later (the
+! 23 ! * "GPL"), in which case the provisions of the GPL are applicable
+! 24 ! * instead of those above. If you wish to allow use of your
+! 25 ! * version of this file only under the terms of the GPL and not to
+! 26 ! * allow others to use your version of this file under the MPL,
+! 27 ! * indicate your decision by deleting the provisions above and
+! 28 ! * replace them with the notice and other provisions required by
+! 29 ! * the GPL. If you do not delete the provisions above, a recipient
+! 30 ! * may use your version of this file under either the MPL or the
+! 31 ! * GPL.
+! 34 ! */
+! 36 !#define RF_INLINE_MACROS
+! 38 !static const double TwoTo16=65536.0;
+! 39 !static const double TwoToMinus16=1.0/65536.0;
+! 40 !static const double Zero=0.0;
+! 41 !static const double TwoTo32=65536.0*65536.0;
+! 42 !static const double TwoToMinus32=1.0/(65536.0*65536.0);
+! 44 !#ifdef RF_INLINE_MACROS
+! 46 !double upper32(double);
+! 47 !double lower32(double, double);
+! 48 !double mod(double, double, double);
+! 50 !void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/,
+! 51 ! const double * /* 2^16*/,
+! 52 ! const double * /* 0 */,
+! 53 ! double * /*result16*/,
+! 54 ! double * /* result32 */,
+! 55 ! float * /*source - should be unsigned int*
+! 56 ! converted to float* */);
+! 58 !#else
+! 60 !static double upper32(double x)
+! 61 !{
+! 62 ! return floor(x*TwoToMinus32);
+! 63 !}
+! 65 !static double lower32(double x, double y)
+! 66 !{
+! 67 ! return x-TwoTo32*floor(x*TwoToMinus32);
+! 68 !}
+! 70 !static double mod(double x, double oneoverm, double m)
+! 71 !{
+! 72 ! return x-m*floor(x*oneoverm);
+! 73 !}
+! 75 !#endif
+! 78 !static void cleanup(double *dt, int from, int tlen)
+! 79 !{
+! 80 ! int i;
+! 81 ! double tmp,tmp1,x,x1;
+! 83 ! tmp=tmp1=Zero;
+! 84 ! /* original code **
+! 85 ! for(i=2*from;i<2*tlen-2;i++)
+! 86 ! {
+! 87 ! x=dt[i];
+! 88 ! dt[i]=lower32(x,Zero)+tmp1;
+! 89 ! tmp1=tmp;
+! 90 ! tmp=upper32(x);
+! 91 ! }
+! 92 ! dt[tlen-2]+=tmp1;
+! 93 ! dt[tlen-1]+=tmp;
+! 94 ! **end original code ***/
+! 95 ! /* new code ***/
+! 96 ! for(i=2*from;i<2*tlen;i+=2)
+! 97 ! {
+! 98 ! x=dt[i];
+! 99 ! x1=dt[i+1];
+! 100 ! dt[i]=lower32(x,Zero)+tmp;
+! 101 ! dt[i+1]=lower32(x1,Zero)+tmp1;
+! 102 ! tmp=upper32(x);
+! 103 ! tmp1=upper32(x1);
+! 104 ! }
+! 105 ! /** end new code **/
+! 106 !}
+! 109 !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
+! 110 !{
+! 111 !int i;
+! 112 !long long t, t1, a, b, c, d;
+! 114 ! t1=0;
+! 115 ! a=(long long)d16[0];
+
+/* 0x0004 115 */ ldd [%i1],%f2
+
+! 116 ! b=(long long)d16[1];
+! 117 ! for(i=0; i<ilen-1; i++)
+
+/* 0x0008 117 */ sub %i3,1,%o1
+/* 0x000c 110 */ or %g0,%i0,%g1
+/* 0x0010 116 */ ldd [%i1+8],%f4
+/* 0x0014 117 */ cmp %o1,0
+/* 0x0018 114 */ or %g0,0,%g5
+/* 0x001c 115 */ fdtox %f2,%f2
+/* 0x0020 */ std %f2,[%sp+2247]
+/* 0x0024 117 */ or %g0,0,%o0
+/* 0x0028 116 */ fdtox %f4,%f2
+/* 0x002c */ std %f2,[%sp+2239]
+/* 0x0030 110 */ sub %o1,1,%o7
+/* 0x0034 */ or %g0,%i1,%o4
+/* 0x0038 */ sethi %hi(0xfc00),%o3
+/* 0x003c */ or %g0,-1,%o1
+/* 0x0040 */ or %g0,2,%i1
+/* 0x0044 */ srl %o1,0,%g3
+/* 0x0048 */ or %g0,%o4,%g4
+/* 0x004c 116 */ ldx [%sp+2239],%i2
+/* 0x0050 */ add %o3,1023,%o5
+/* 0x0054 117 */ sub %o7,1,%o2
+/* 0x0058 115 */ ldx [%sp+2247],%i3
+/* 0x005c 117 */ ble,pt %icc,.L900000113
+/* 0x0060 */ sethi %hi(0xfc00),%g2
+/* 0x0064 */ add %o7,1,%g2
+
+! 118 ! {
+! 119 ! c=(long long)d16[2*i+2];
+! 120 ! t1+=a&0xffffffff;
+! 121 ! t=(a>>32);
+! 122 ! d=(long long)d16[2*i+3];
+! 123 ! t1+=(b&0xffff)<<16;
+
+/* 0x0068 123 */ and %i2,%o5,%i4
+/* 0x006c */ sllx %i4,16,%o1
+/* 0x0070 117 */ cmp %g2,6
+/* 0x0074 */ bl,pn %icc,.L77000134
+/* 0x0078 */ or %g0,3,%i0
+/* 0x007c 119 */ ldd [%o4+16],%f0
+/* 0x0080 120 */ and %i3,%g3,%o3
+
+! 124 ! t+=(b>>16)+(t1>>32);
+
+/* 0x0084 124 */ srax %i2,16,%i5
+/* 0x0088 117 */ add %o3,%o1,%i4
+/* 0x008c 121 */ srax %i3,32,%i3
+/* 0x0090 119 */ fdtox %f0,%f0
+/* 0x0094 */ std %f0,[%sp+2231]
+
+! 125 ! i32[i]=t1&0xffffffff;
+
+/* 0x0098 125 */ and %i4,%g3,%l0
+/* 0x009c 117 */ or %g0,72,%o3
+/* 0x00a0 122 */ ldd [%g4+24],%f0
+/* 0x00a4 117 */ or %g0,64,%o4
+/* 0x00a8 */ or %g0,4,%o1
+
+! 126 ! t1=t;
+! 127 ! a=c;
+! 128 ! b=d;
+
+/* 0x00ac 128 */ or %g0,5,%i0
+/* 0x00b0 */ or %g0,4,%i1
+/* 0x00b4 119 */ ldx [%sp+2231],%g2
+/* 0x00b8 122 */ fdtox %f0,%f0
+/* 0x00bc 128 */ or %g0,4,%o0
+/* 0x00c0 122 */ std %f0,[%sp+2223]
+/* 0x00c4 */ ldd [%g4+40],%f2
+/* 0x00c8 120 */ and %g2,%g3,%i2
+/* 0x00cc 119 */ ldd [%g4+32],%f0
+/* 0x00d0 121 */ srax %g2,32,%g2
+/* 0x00d4 122 */ ldd [%g4+56],%f4
+/* 0x00d8 */ fdtox %f2,%f2
+/* 0x00dc */ ldx [%sp+2223],%g5
+/* 0x00e0 119 */ fdtox %f0,%f0
+/* 0x00e4 125 */ st %l0,[%g1]
+/* 0x00e8 124 */ srax %i4,32,%l0
+/* 0x00ec 122 */ fdtox %f4,%f4
+/* 0x00f0 */ std %f2,[%sp+2223]
+/* 0x00f4 123 */ and %g5,%o5,%i4
+/* 0x00f8 124 */ add %i5,%l0,%i5
+/* 0x00fc 119 */ std %f0,[%sp+2231]
+/* 0x0100 123 */ sllx %i4,16,%i4
+/* 0x0104 124 */ add %i3,%i5,%i3
+/* 0x0108 119 */ ldd [%g4+48],%f2
+/* 0x010c 124 */ srax %g5,16,%g5
+/* 0x0110 117 */ add %i2,%i4,%i2
+/* 0x0114 122 */ ldd [%g4+72],%f0
+/* 0x0118 117 */ add %i2,%i3,%i4
+/* 0x011c 124 */ srax %i4,32,%i5
+/* 0x0120 119 */ fdtox %f2,%f2
+/* 0x0124 125 */ and %i4,%g3,%i4
+/* 0x0128 122 */ ldx [%sp+2223],%i2
+/* 0x012c 124 */ add %g5,%i5,%g5
+/* 0x0130 119 */ ldx [%sp+2231],%i3
+/* 0x0134 124 */ add %g2,%g5,%g5
+/* 0x0138 119 */ std %f2,[%sp+2231]
+/* 0x013c 122 */ std %f4,[%sp+2223]
+/* 0x0140 119 */ ldd [%g4+64],%f2
+/* 0x0144 125 */ st %i4,[%g1+4]
+ .L900000108:
+/* 0x0148 122 */ ldx [%sp+2223],%i4
+/* 0x014c 128 */ add %o0,2,%o0
+/* 0x0150 */ add %i0,4,%i0
+/* 0x0154 119 */ ldx [%sp+2231],%l0
+/* 0x0158 117 */ add %o3,16,%o3
+/* 0x015c 123 */ and %i2,%o5,%g2
+/* 0x0160 */ sllx %g2,16,%i5
+/* 0x0164 120 */ and %i3,%g3,%g2
+/* 0x0168 122 */ ldd [%g4+%o3],%f4
+/* 0x016c */ fdtox %f0,%f0
+/* 0x0170 */ std %f0,[%sp+2223]
+/* 0x0174 124 */ srax %i2,16,%i2
+/* 0x0178 117 */ add %g2,%i5,%g2
+/* 0x017c 119 */ fdtox %f2,%f0
+/* 0x0180 117 */ add %o4,16,%o4
+/* 0x0184 119 */ std %f0,[%sp+2231]
+/* 0x0188 117 */ add %g2,%g5,%g2
+/* 0x018c 119 */ ldd [%g4+%o4],%f2
+/* 0x0190 124 */ srax %g2,32,%i5
+/* 0x0194 128 */ cmp %o0,%o2
+/* 0x0198 121 */ srax %i3,32,%g5
+/* 0x019c 124 */ add %i2,%i5,%i2
+/* 0x01a0 */ add %g5,%i2,%i5
+/* 0x01a4 117 */ add %o1,4,%o1
+/* 0x01a8 125 */ and %g2,%g3,%g2
+/* 0x01ac 127 */ or %g0,%l0,%g5
+/* 0x01b0 125 */ st %g2,[%g1+%o1]
+/* 0x01b4 128 */ add %i1,4,%i1
+/* 0x01b8 122 */ ldx [%sp+2223],%i2
+/* 0x01bc 119 */ ldx [%sp+2231],%i3
+/* 0x01c0 117 */ add %o3,16,%o3
+/* 0x01c4 123 */ and %i4,%o5,%g2
+/* 0x01c8 */ sllx %g2,16,%l0
+/* 0x01cc 120 */ and %g5,%g3,%g2
+/* 0x01d0 122 */ ldd [%g4+%o3],%f0
+/* 0x01d4 */ fdtox %f4,%f4
+/* 0x01d8 */ std %f4,[%sp+2223]
+/* 0x01dc 124 */ srax %i4,16,%i4
+/* 0x01e0 117 */ add %g2,%l0,%g2
+/* 0x01e4 119 */ fdtox %f2,%f2
+/* 0x01e8 117 */ add %o4,16,%o4
+/* 0x01ec 119 */ std %f2,[%sp+2231]
+/* 0x01f0 117 */ add %g2,%i5,%g2
+/* 0x01f4 119 */ ldd [%g4+%o4],%f2
+/* 0x01f8 124 */ srax %g2,32,%i5
+/* 0x01fc 121 */ srax %g5,32,%g5
+/* 0x0200 124 */ add %i4,%i5,%i4
+/* 0x0204 */ add %g5,%i4,%g5
+/* 0x0208 117 */ add %o1,4,%o1
+/* 0x020c 125 */ and %g2,%g3,%g2
+/* 0x0210 128 */ ble,pt %icc,.L900000108
+/* 0x0214 */ st %g2,[%g1+%o1]
+ .L900000111:
+/* 0x0218 122 */ ldx [%sp+2223],%o2
+/* 0x021c 123 */ and %i2,%o5,%i4
+/* 0x0220 120 */ and %i3,%g3,%g2
+/* 0x0224 123 */ sllx %i4,16,%i4
+/* 0x0228 119 */ ldx [%sp+2231],%i5
+/* 0x022c 128 */ cmp %o0,%o7
+/* 0x0230 124 */ srax %i2,16,%i2
+/* 0x0234 117 */ add %g2,%i4,%g2
+/* 0x0238 122 */ fdtox %f0,%f4
+/* 0x023c */ std %f4,[%sp+2223]
+/* 0x0240 117 */ add %g2,%g5,%g5
+/* 0x0244 123 */ and %o2,%o5,%l0
+/* 0x0248 124 */ srax %g5,32,%l1
+/* 0x024c 120 */ and %i5,%g3,%i4
+/* 0x0250 119 */ fdtox %f2,%f0
+/* 0x0254 121 */ srax %i3,32,%g2
+/* 0x0258 119 */ std %f0,[%sp+2231]
+/* 0x025c 124 */ add %i2,%l1,%i2
+/* 0x0260 123 */ sllx %l0,16,%i3
+/* 0x0264 124 */ add %g2,%i2,%i2
+/* 0x0268 */ srax %o2,16,%o2
+/* 0x026c 117 */ add %o1,4,%g2
+/* 0x0270 */ add %i4,%i3,%o1
+/* 0x0274 125 */ and %g5,%g3,%g5
+/* 0x0278 */ st %g5,[%g1+%g2]
+/* 0x027c 119 */ ldx [%sp+2231],%i3
+/* 0x0280 117 */ add %o1,%i2,%o1
+/* 0x0284 */ add %g2,4,%g2
+/* 0x0288 124 */ srax %o1,32,%i4
+/* 0x028c 122 */ ldx [%sp+2223],%i2
+/* 0x0290 125 */ and %o1,%g3,%g5
+/* 0x0294 121 */ srax %i5,32,%o1
+/* 0x0298 124 */ add %o2,%i4,%o2
+/* 0x029c 125 */ st %g5,[%g1+%g2]
+/* 0x02a0 128 */ bg,pn %icc,.L77000127
+/* 0x02a4 */ add %o1,%o2,%g5
+/* 0x02a8 */ add %i0,6,%i0
+/* 0x02ac */ add %i1,6,%i1
+ .L77000134:
+/* 0x02b0 119 */ sra %i1,0,%o2
+ .L900000112:
+/* 0x02b4 119 */ sllx %o2,3,%o3
+/* 0x02b8 120 */ and %i3,%g3,%o1
+/* 0x02bc 119 */ ldd [%g4+%o3],%f0
+/* 0x02c0 122 */ sra %i0,0,%o3
+/* 0x02c4 123 */ and %i2,%o5,%o2
+/* 0x02c8 122 */ sllx %o3,3,%o3
+/* 0x02cc 120 */ add %g5,%o1,%o1
+/* 0x02d0 119 */ fdtox %f0,%f0
+/* 0x02d4 */ std %f0,[%sp+2231]
+/* 0x02d8 123 */ sllx %o2,16,%o2
+/* 0x02dc */ add %o1,%o2,%o2
+/* 0x02e0 128 */ add %i1,2,%i1
+/* 0x02e4 122 */ ldd [%g4+%o3],%f0
+/* 0x02e8 124 */ srax %o2,32,%g2
+/* 0x02ec 125 */ and %o2,%g3,%o3
+/* 0x02f0 124 */ srax %i2,16,%o1
+/* 0x02f4 128 */ add %i0,2,%i0
+/* 0x02f8 122 */ fdtox %f0,%f0
+/* 0x02fc */ std %f0,[%sp+2223]
+/* 0x0300 125 */ sra %o0,0,%o2
+/* 0x0304 */ sllx %o2,2,%o2
+/* 0x0308 124 */ add %o1,%g2,%g5
+/* 0x030c 121 */ srax %i3,32,%g2
+/* 0x0310 128 */ add %o0,1,%o0
+/* 0x0314 124 */ add %g2,%g5,%g5
+/* 0x0318 128 */ cmp %o0,%o7
+/* 0x031c 119 */ ldx [%sp+2231],%o4
+/* 0x0320 122 */ ldx [%sp+2223],%i2
+/* 0x0324 125 */ st %o3,[%g1+%o2]
+/* 0x0328 127 */ or %g0,%o4,%i3
+/* 0x032c 128 */ ble,pt %icc,.L900000112
+/* 0x0330 */ sra %i1,0,%o2
+ .L77000127:
+
+! 129 ! }
+! 130 ! t1+=a&0xffffffff;
+! 131 ! t=(a>>32);
+! 132 ! t1+=(b&0xffff)<<16;
+! 133 ! i32[i]=t1&0xffffffff;
+
+/* 0x0334 133 */ sethi %hi(0xfc00),%g2
+ .L900000113:
+/* 0x0338 133 */ or %g0,-1,%g3
+/* 0x033c */ add %g2,1023,%g2
+/* 0x0340 */ srl %g3,0,%g3
+/* 0x0344 */ and %i2,%g2,%g2
+/* 0x0348 */ and %i3,%g3,%g4
+/* 0x034c */ sllx %g2,16,%g2
+/* 0x0350 */ add %g5,%g4,%g4
+/* 0x0354 */ sra %o0,0,%g5
+/* 0x0358 */ add %g4,%g2,%g4
+/* 0x035c */ sllx %g5,2,%g2
+/* 0x0360 */ and %g4,%g3,%g3
+/* 0x0364 */ st %g3,[%g1+%g2]
+/* 0x0368 */ ret ! Result =
+/* 0x036c */ restore %g0,%g0,%g0
+/* 0x0370 0 */ .type conv_d16_to_i32,2
+/* 0x0370 */ .size conv_d16_to_i32,(.-conv_d16_to_i32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000201:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 8
+/* 0x0008 */ .skip 24
+!
+! SUBROUTINE conv_i32_to_d32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d32
+ conv_i32_to_d32:
+/* 000000 */ or %g0,%o7,%g3
+
+! 135 !}
+! 137 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
+! 138 !{
+! 139 !int i;
+! 141 !#pragma pipeloop(0)
+! 142 ! for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
+
+/* 0x0004 142 */ cmp %o2,0
+ .L900000210:
+/* 0x0008 */ call .+8
+/* 0x000c */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
+/* 0x0010 142 */ or %g0,0,%o3
+/* 0x0014 138 */ add %g4,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
+/* 0x0018 142 */ sub %o2,1,%o4
+/* 0x001c 138 */ add %g4,%o7,%g1
+/* 0x0020 142 */ ble,pt %icc,.L77000140
+/* 0x0024 */ or %g0,%g3,%o7
+/* 0x0028 */ sethi %hi(.L_const_seg_900000201),%g3
+/* 0x002c */ cmp %o2,12
+/* 0x0030 */ add %g3,%lo(.L_const_seg_900000201),%g2
+/* 0x0034 */ or %g0,%o1,%g5
+/* 0x0038 */ ldx [%g1+%g2],%g4
+/* 0x003c */ or %g0,0,%g1
+/* 0x0040 */ or %g0,24,%g2
+/* 0x0044 */ bl,pn %icc,.L77000144
+/* 0x0048 */ or %g0,0,%g3
+/* 0x004c */ ld [%o1],%f13
+/* 0x0050 */ or %g0,7,%o3
+/* 0x0054 */ ldd [%g4],%f8
+/* 0x0058 */ sub %o2,5,%g3
+/* 0x005c */ or %g0,8,%g1
+/* 0x0060 */ ld [%o1+4],%f11
+/* 0x0064 */ ld [%o1+8],%f7
+/* 0x0068 */ fmovs %f8,%f12
+/* 0x006c */ ld [%o1+12],%f5
+/* 0x0070 */ fmovs %f8,%f10
+/* 0x0074 */ ld [%o1+16],%f3
+/* 0x0078 */ fmovs %f8,%f6
+/* 0x007c */ ld [%o1+20],%f1
+/* 0x0080 */ fsubd %f12,%f8,%f12
+/* 0x0084 */ std %f12,[%o0]
+/* 0x0088 */ fsubd %f10,%f8,%f10
+/* 0x008c */ std %f10,[%o0+8]
+ .L900000205:
+/* 0x0090 */ ld [%o1+%g2],%f11
+/* 0x0094 */ add %g1,8,%g1
+/* 0x0098 */ add %o3,5,%o3
+/* 0x009c */ fsubd %f6,%f8,%f6
+/* 0x00a0 */ add %g2,4,%g2
+/* 0x00a4 */ std %f6,[%o0+%g1]
+/* 0x00a8 */ cmp %o3,%g3
+/* 0x00ac */ fmovs %f8,%f4
+/* 0x00b0 */ ld [%o1+%g2],%f7
+/* 0x00b4 */ fsubd %f4,%f8,%f12
+/* 0x00b8 */ add %g1,8,%g1
+/* 0x00bc */ add %g2,4,%g2
+/* 0x00c0 */ fmovs %f8,%f2
+/* 0x00c4 */ std %f12,[%o0+%g1]
+/* 0x00c8 */ ld [%o1+%g2],%f5
+/* 0x00cc */ fsubd %f2,%f8,%f12
+/* 0x00d0 */ add %g1,8,%g1
+/* 0x00d4 */ add %g2,4,%g2
+/* 0x00d8 */ fmovs %f8,%f0
+/* 0x00dc */ std %f12,[%o0+%g1]
+/* 0x00e0 */ ld [%o1+%g2],%f3
+/* 0x00e4 */ fsubd %f0,%f8,%f12
+/* 0x00e8 */ add %g1,8,%g1
+/* 0x00ec */ add %g2,4,%g2
+/* 0x00f0 */ fmovs %f8,%f10
+/* 0x00f4 */ std %f12,[%o0+%g1]
+/* 0x00f8 */ ld [%o1+%g2],%f1
+/* 0x00fc */ fsubd %f10,%f8,%f10
+/* 0x0100 */ add %g1,8,%g1
+/* 0x0104 */ add %g2,4,%g2
+/* 0x0108 */ std %f10,[%o0+%g1]
+/* 0x010c */ ble,pt %icc,.L900000205
+/* 0x0110 */ fmovs %f8,%f6
+ .L900000208:
+/* 0x0114 */ fmovs %f8,%f4
+/* 0x0118 */ ld [%o1+%g2],%f11
+/* 0x011c */ add %g1,8,%g3
+/* 0x0120 */ fmovs %f8,%f2
+/* 0x0124 */ add %g1,16,%g1
+/* 0x0128 */ cmp %o3,%o4
+/* 0x012c */ fmovs %f8,%f0
+/* 0x0130 */ add %g1,8,%o1
+/* 0x0134 */ add %g1,16,%o2
+/* 0x0138 */ fmovs %f8,%f10
+/* 0x013c */ add %g1,24,%g2
+/* 0x0140 */ fsubd %f6,%f8,%f6
+/* 0x0144 */ std %f6,[%o0+%g3]
+/* 0x0148 */ fsubd %f4,%f8,%f4
+/* 0x014c */ std %f4,[%o0+%g1]
+/* 0x0150 */ sra %o3,0,%g1
+/* 0x0154 */ fsubd %f2,%f8,%f2
+/* 0x0158 */ std %f2,[%o0+%o1]
+/* 0x015c */ sllx %g1,2,%g3
+/* 0x0160 */ fsubd %f0,%f8,%f0
+/* 0x0164 */ std %f0,[%o0+%o2]
+/* 0x0168 */ fsubd %f10,%f8,%f0
+/* 0x016c */ bg,pn %icc,.L77000140
+/* 0x0170 */ std %f0,[%o0+%g2]
+ .L77000144:
+/* 0x0174 */ ldd [%g4],%f8
+ .L900000211:
+/* 0x0178 */ ld [%g5+%g3],%f13
+/* 0x017c */ sllx %g1,3,%g2
+/* 0x0180 */ add %o3,1,%o3
+/* 0x0184 */ sra %o3,0,%g1
+/* 0x0188 */ cmp %o3,%o4
+/* 0x018c */ fmovs %f8,%f12
+/* 0x0190 */ sllx %g1,2,%g3
+/* 0x0194 */ fsubd %f12,%f8,%f0
+/* 0x0198 */ std %f0,[%o0+%g2]
+/* 0x019c */ ble,a,pt %icc,.L900000211
+/* 0x01a0 */ ldd [%g4],%f8
+ .L77000140:
+/* 0x01a4 */ retl ! Result =
+/* 0x01a8 */ nop
+/* 0x01ac 0 */ .type conv_i32_to_d32,2
+/* 0x01ac */ .size conv_i32_to_d32,(.-conv_i32_to_d32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000301:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 8
+/* 0x0008 */ .skip 24
+!
+! SUBROUTINE conv_i32_to_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d16
+ conv_i32_to_d16:
+/* 000000 */ save %sp,-192,%sp
+ .L900000310:
+/* 0x0004 */ call .+8
+/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
+
+! 143 !}
+! 146 !void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
+! 147 !{
+! 148 !int i;
+! 149 !unsigned int a;
+! 151 !#pragma pipeloop(0)
+! 152 ! for(i=0;i<len;i++)
+
+/* 0x000c 152 */ cmp %i2,0
+/* 0x0010 147 */ add %g3,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
+/* 0x0014 152 */ ble,pt %icc,.L77000150
+/* 0x0018 */ add %g3,%o7,%o0
+
+! 153 ! {
+! 154 ! a=i32[i];
+! 155 ! d16[2*i]=(double)(a&0xffff);
+! 156 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x001c 156 */ sethi %hi(.L_const_seg_900000301),%g2
+/* 0x0020 147 */ or %g0,%i2,%o1
+/* 0x0024 152 */ sethi %hi(0xfc00),%g3
+/* 0x0028 156 */ add %g2,%lo(.L_const_seg_900000301),%g2
+/* 0x002c 152 */ or %g0,%o1,%g4
+/* 0x0030 156 */ ldx [%o0+%g2],%o5
+/* 0x0034 152 */ add %g3,1023,%g1
+/* 0x0038 147 */ or %g0,%i1,%o7
+/* 0x003c 152 */ or %g0,0,%i2
+/* 0x0040 */ sub %o1,1,%g5
+/* 0x0044 */ or %g0,0,%g3
+/* 0x0048 */ or %g0,1,%g2
+/* 0x004c 154 */ or %g0,0,%o2
+/* 0x0050 */ cmp %g4,6
+/* 0x0054 152 */ bl,pn %icc,.L77000154
+/* 0x0058 */ ldd [%o5],%f0
+/* 0x005c */ sub %o1,2,%o3
+/* 0x0060 */ or %g0,16,%o2
+/* 0x0064 154 */ ld [%i1],%o4
+/* 0x0068 156 */ or %g0,3,%g2
+/* 0x006c */ or %g0,2,%g3
+/* 0x0070 155 */ fmovs %f0,%f2
+/* 0x0074 156 */ or %g0,4,%i2
+/* 0x0078 155 */ and %o4,%g1,%o0
+/* 0x007c */ st %o0,[%sp+2227]
+/* 0x0080 */ fmovs %f0,%f4
+/* 0x0084 156 */ srl %o4,16,%i4
+/* 0x0088 152 */ or %g0,12,%o4
+/* 0x008c */ or %g0,24,%o0
+/* 0x0090 155 */ ld [%sp+2227],%f3
+/* 0x0094 */ fsubd %f2,%f0,%f2
+/* 0x0098 */ std %f2,[%i0]
+/* 0x009c 156 */ st %i4,[%sp+2223]
+/* 0x00a0 154 */ ld [%o7+4],%o1
+/* 0x00a4 156 */ fmovs %f0,%f2
+/* 0x00a8 155 */ and %o1,%g1,%i1
+/* 0x00ac 156 */ ld [%sp+2223],%f3
+/* 0x00b0 */ srl %o1,16,%o1
+/* 0x00b4 */ fsubd %f2,%f0,%f2
+/* 0x00b8 */ std %f2,[%i0+8]
+/* 0x00bc */ st %o1,[%sp+2223]
+/* 0x00c0 155 */ st %i1,[%sp+2227]
+/* 0x00c4 154 */ ld [%o7+8],%o1
+/* 0x00c8 156 */ fmovs %f0,%f2
+/* 0x00cc 155 */ and %o1,%g1,%g4
+/* 0x00d0 */ ld [%sp+2227],%f5
+/* 0x00d4 156 */ srl %o1,16,%o1
+/* 0x00d8 */ ld [%sp+2223],%f3
+/* 0x00dc */ st %o1,[%sp+2223]
+/* 0x00e0 155 */ fsubd %f4,%f0,%f4
+/* 0x00e4 */ st %g4,[%sp+2227]
+/* 0x00e8 156 */ fsubd %f2,%f0,%f2
+/* 0x00ec 154 */ ld [%o7+12],%o1
+/* 0x00f0 155 */ std %f4,[%i0+16]
+/* 0x00f4 156 */ std %f2,[%i0+24]
+ .L900000306:
+/* 0x00f8 155 */ ld [%sp+2227],%f5
+/* 0x00fc 156 */ add %i2,2,%i2
+/* 0x0100 */ add %g2,4,%g2
+/* 0x0104 */ ld [%sp+2223],%f3
+/* 0x0108 */ cmp %i2,%o3
+/* 0x010c */ add %g3,4,%g3
+/* 0x0110 155 */ and %o1,%g1,%g4
+/* 0x0114 156 */ srl %o1,16,%o1
+/* 0x0118 155 */ st %g4,[%sp+2227]
+/* 0x011c 156 */ st %o1,[%sp+2223]
+/* 0x0120 152 */ add %o4,4,%o1
+/* 0x0124 154 */ ld [%o7+%o1],%o4
+/* 0x0128 156 */ fmovs %f0,%f2
+/* 0x012c 155 */ fmovs %f0,%f4
+/* 0x0130 */ fsubd %f4,%f0,%f4
+/* 0x0134 152 */ add %o2,16,%o2
+/* 0x0138 156 */ fsubd %f2,%f0,%f2
+/* 0x013c 155 */ std %f4,[%i0+%o2]
+/* 0x0140 152 */ add %o0,16,%o0
+/* 0x0144 156 */ std %f2,[%i0+%o0]
+/* 0x0148 155 */ ld [%sp+2227],%f5
+/* 0x014c 156 */ ld [%sp+2223],%f3
+/* 0x0150 155 */ and %o4,%g1,%g4
+/* 0x0154 156 */ srl %o4,16,%o4
+/* 0x0158 155 */ st %g4,[%sp+2227]
+/* 0x015c 156 */ st %o4,[%sp+2223]
+/* 0x0160 152 */ add %o1,4,%o4
+/* 0x0164 154 */ ld [%o7+%o4],%o1
+/* 0x0168 156 */ fmovs %f0,%f2
+/* 0x016c 155 */ fmovs %f0,%f4
+/* 0x0170 */ fsubd %f4,%f0,%f4
+/* 0x0174 152 */ add %o2,16,%o2
+/* 0x0178 156 */ fsubd %f2,%f0,%f2
+/* 0x017c 155 */ std %f4,[%i0+%o2]
+/* 0x0180 152 */ add %o0,16,%o0
+/* 0x0184 156 */ ble,pt %icc,.L900000306
+/* 0x0188 */ std %f2,[%i0+%o0]
+ .L900000309:
+/* 0x018c 155 */ ld [%sp+2227],%f5
+/* 0x0190 156 */ fmovs %f0,%f2
+/* 0x0194 */ srl %o1,16,%o3
+/* 0x0198 */ ld [%sp+2223],%f3
+/* 0x019c 155 */ and %o1,%g1,%i1
+/* 0x01a0 152 */ add %o2,16,%g4
+/* 0x01a4 155 */ fmovs %f0,%f4
+/* 0x01a8 */ st %i1,[%sp+2227]
+/* 0x01ac 152 */ add %o0,16,%o2
+/* 0x01b0 156 */ st %o3,[%sp+2223]
+/* 0x01b4 154 */ sra %i2,0,%o3
+/* 0x01b8 152 */ add %g4,16,%o1
+/* 0x01bc 155 */ fsubd %f4,%f0,%f4
+/* 0x01c0 */ std %f4,[%i0+%g4]
+/* 0x01c4 152 */ add %o0,32,%o0
+/* 0x01c8 156 */ fsubd %f2,%f0,%f2
+/* 0x01cc */ std %f2,[%i0+%o2]
+/* 0x01d0 */ sllx %o3,2,%o2
+/* 0x01d4 155 */ ld [%sp+2227],%f5
+/* 0x01d8 156 */ cmp %i2,%g5
+/* 0x01dc */ add %g2,6,%g2
+/* 0x01e0 */ ld [%sp+2223],%f3
+/* 0x01e4 */ add %g3,6,%g3
+/* 0x01e8 155 */ fmovs %f0,%f4
+/* 0x01ec 156 */ fmovs %f0,%f2
+/* 0x01f0 155 */ fsubd %f4,%f0,%f4
+/* 0x01f4 */ std %f4,[%i0+%o1]
+/* 0x01f8 156 */ fsubd %f2,%f0,%f0
+/* 0x01fc */ bg,pn %icc,.L77000150
+/* 0x0200 */ std %f0,[%i0+%o0]
+ .L77000154:
+/* 0x0204 155 */ ldd [%o5],%f0
+ .L900000311:
+/* 0x0208 154 */ ld [%o7+%o2],%o0
+/* 0x020c 155 */ sra %g3,0,%o1
+/* 0x0210 */ fmovs %f0,%f2
+/* 0x0214 */ sllx %o1,3,%o2
+/* 0x0218 156 */ add %i2,1,%i2
+/* 0x021c 155 */ and %o0,%g1,%o1
+/* 0x0220 */ st %o1,[%sp+2227]
+/* 0x0224 156 */ add %g3,2,%g3
+/* 0x0228 */ srl %o0,16,%o1
+/* 0x022c */ cmp %i2,%g5
+/* 0x0230 */ sra %g2,0,%o0
+/* 0x0234 */ add %g2,2,%g2
+/* 0x0238 */ sllx %o0,3,%o0
+/* 0x023c 155 */ ld [%sp+2227],%f3
+/* 0x0240 154 */ sra %i2,0,%o3
+/* 0x0244 155 */ fsubd %f2,%f0,%f2
+/* 0x0248 */ std %f2,[%i0+%o2]
+/* 0x024c */ sllx %o3,2,%o2
+/* 0x0250 156 */ st %o1,[%sp+2223]
+/* 0x0254 */ fmovs %f0,%f2
+/* 0x0258 */ ld [%sp+2223],%f3
+/* 0x025c */ fsubd %f2,%f0,%f0
+/* 0x0260 */ std %f0,[%i0+%o0]
+/* 0x0264 */ ble,a,pt %icc,.L900000311
+/* 0x0268 */ ldd [%o5],%f0
+ .L77000150:
+/* 0x026c */ ret ! Result =
+/* 0x0270 */ restore %g0,%g0,%g0
+/* 0x0274 0 */ .type conv_i32_to_d16,2
+/* 0x0274 */ .size conv_i32_to_d16,(.-conv_i32_to_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000401:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 8
+/* 0x0008 */ .skip 24
+!
+! SUBROUTINE conv_i32_to_d32_and_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d32_and_d16
+ conv_i32_to_d32_and_d16:
+/* 000000 */ save %sp,-192,%sp
+ .L900000415:
+/* 0x0004 */ call .+8
+/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g3
+
+! 157 ! }
+! 158 !}
+! 161 !void conv_i32_to_d32_and_d16(double *d32, double *d16,
+! 162 ! unsigned int *i32, int len)
+! 163 !{
+! 164 !int i = 0;
+! 165 !unsigned int a;
+! 167 !#pragma pipeloop(0)
+! 168 !#ifdef RF_INLINE_MACROS
+! 169 ! for(;i<len-3;i+=4)
+! 170 ! {
+! 171 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
+! 172 ! &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
+
+/* 0x000c 172 */ sethi %hi(Zero),%g2
+/* 0x0010 163 */ add %g3,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g3
+/* 0x0014 */ or %g0,%i3,%g5
+/* 0x0018 */ add %g3,%o7,%o3
+/* 0x001c 172 */ add %g2,%lo(Zero),%g2
+/* 0x0020 */ ldx [%o3+%g2],%o0
+/* 0x0024 */ sethi %hi(TwoToMinus16),%g3
+/* 0x0028 163 */ or %g0,%i0,%i3
+/* 0x002c 169 */ sub %g5,3,%o1
+/* 0x0030 172 */ sethi %hi(TwoTo16),%g4
+/* 0x0034 163 */ or %g0,%i2,%i0
+/* 0x0038 172 */ add %g3,%lo(TwoToMinus16),%g2
+/* 0x003c */ ldx [%o3+%g2],%o2
+/* 0x0040 169 */ cmp %o1,0
+/* 0x0044 164 */ or %g0,0,%i2
+/* 0x0048 169 */ ble,pt %icc,.L900000418
+/* 0x004c */ cmp %i2,%g5
+/* 0x0050 */ ldd [%o0],%f2
+/* 0x0054 172 */ add %g4,%lo(TwoTo16),%g3
+/* 0x0058 */ ldx [%o3+%g3],%o1
+/* 0x005c 169 */ sub %g5,4,%o4
+/* 0x0060 */ or %g0,0,%o5
+ .L900000417:
+/* 0x0064 172 */ sra %i2,0,%g2
+/* 0x0068 */ fmovd %f2,%f14
+/* 0x006c */ ldd [%o2],%f0
+/* 0x0070 */ sllx %g2,2,%g3
+/* 0x0074 */ fmovd %f2,%f10
+/* 0x0078 */ ldd [%o1],%f16
+/* 0x007c */ ld [%g3+%i0],%f15
+/* 0x0080 */ add %i0,%g3,%g3
+/* 0x0084 */ fmovd %f2,%f6
+/* 0x0088 */ ld [%g3+4],%f11
+/* 0x008c */ sra %o5,0,%g4
+/* 0x0090 */ add %i2,4,%i2
+/* 0x0094 */ ld [%g3+8],%f7
+/* 0x0098 */ fxtod %f14,%f14
+/* 0x009c */ sllx %g2,3,%g2
+/* 0x00a0 */ ld [%g3+12],%f3
+/* 0x00a4 */ fxtod %f10,%f10
+/* 0x00a8 */ sllx %g4,3,%g3
+/* 0x00ac */ fxtod %f6,%f6
+/* 0x00b0 */ std %f14,[%g2+%i3]
+/* 0x00b4 */ add %i3,%g2,%g4
+/* 0x00b8 */ fxtod %f2,%f2
+/* 0x00bc */ fmuld %f0,%f14,%f12
+/* 0x00c0 */ std %f2,[%g4+24]
+/* 0x00c4 */ fmuld %f0,%f10,%f8
+/* 0x00c8 */ std %f10,[%g4+8]
+/* 0x00cc */ add %i1,%g3,%g2
+/* 0x00d0 */ fmuld %f0,%f6,%f4
+/* 0x00d4 */ std %f6,[%g4+16]
+/* 0x00d8 */ cmp %i2,%o4
+/* 0x00dc */ fmuld %f0,%f2,%f0
+/* 0x00e0 */ fdtox %f12,%f12
+/* 0x00e4 */ add %o5,8,%o5
+/* 0x00e8 */ fdtox %f8,%f8
+/* 0x00ec */ fdtox %f4,%f4
+/* 0x00f0 */ fdtox %f0,%f0
+/* 0x00f4 */ fxtod %f12,%f12
+/* 0x00f8 */ std %f12,[%g2+8]
+/* 0x00fc */ fxtod %f8,%f8
+/* 0x0100 */ std %f8,[%g2+24]
+/* 0x0104 */ fxtod %f4,%f4
+/* 0x0108 */ std %f4,[%g2+40]
+/* 0x010c */ fxtod %f0,%f0
+/* 0x0110 */ std %f0,[%g2+56]
+/* 0x0114 */ fmuld %f12,%f16,%f12
+/* 0x0118 */ fmuld %f8,%f16,%f8
+/* 0x011c */ fmuld %f4,%f16,%f4
+/* 0x0120 */ fsubd %f14,%f12,%f12
+/* 0x0124 */ std %f12,[%g3+%i1]
+/* 0x0128 */ fmuld %f0,%f16,%f0
+/* 0x012c */ fsubd %f10,%f8,%f8
+/* 0x0130 */ std %f8,[%g2+16]
+/* 0x0134 */ fsubd %f6,%f4,%f4
+/* 0x0138 */ std %f4,[%g2+32]
+/* 0x013c */ fsubd %f2,%f0,%f0
+/* 0x0140 */ std %f0,[%g2+48]
+/* 0x0144 */ ble,a,pt %icc,.L900000417
+/* 0x0148 */ ldd [%o0],%f2
+ .L77000159:
+
+! 173 ! }
+! 174 !#endif
+! 175 ! for(;i<len;i++)
+
+/* 0x014c 175 */ cmp %i2,%g5
+ .L900000418:
+/* 0x0150 175 */ bge,pt %icc,.L77000164
+/* 0x0154 */ nop
+
+! 176 ! {
+! 177 ! a=i32[i];
+! 178 ! d32[i]=(double)(i32[i]);
+! 179 ! d16[2*i]=(double)(a&0xffff);
+! 180 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x0158 180 */ sethi %hi(.L_const_seg_900000401),%g2
+/* 0x015c */ add %g2,%lo(.L_const_seg_900000401),%g2
+/* 0x0160 175 */ sethi %hi(0xfc00),%g3
+/* 0x0164 180 */ ldx [%o3+%g2],%g1
+/* 0x0168 175 */ sll %i2,1,%i4
+/* 0x016c */ sub %g5,%i2,%g4
+/* 0x0170 177 */ sra %i2,0,%o3
+/* 0x0174 175 */ add %g3,1023,%g3
+/* 0x0178 178 */ ldd [%g1],%f2
+/* 0x017c */ sllx %o3,2,%o2
+/* 0x0180 175 */ add %i4,1,%g2
+/* 0x0184 177 */ or %g0,%o3,%o1
+/* 0x0188 */ cmp %g4,6
+/* 0x018c 175 */ bl,pn %icc,.L77000161
+/* 0x0190 */ sra %i2,0,%o3
+/* 0x0194 177 */ or %g0,%o2,%o0
+/* 0x0198 178 */ ld [%i0+%o2],%f5
+/* 0x019c 179 */ fmovs %f2,%f8
+/* 0x01a0 175 */ add %o0,4,%o3
+/* 0x01a4 177 */ ld [%i0+%o0],%o7
+/* 0x01a8 180 */ fmovs %f2,%f6
+/* 0x01ac 178 */ fmovs %f2,%f4
+/* 0x01b0 */ sllx %o1,3,%o2
+/* 0x01b4 175 */ add %o3,4,%o5
+/* 0x01b8 179 */ sra %i4,0,%o0
+/* 0x01bc 175 */ add %o3,8,%o4
+/* 0x01c0 178 */ fsubd %f4,%f2,%f4
+/* 0x01c4 */ std %f4,[%i3+%o2]
+/* 0x01c8 179 */ sllx %o0,3,%i5
+/* 0x01cc */ and %o7,%g3,%o0
+/* 0x01d0 */ st %o0,[%sp+2227]
+/* 0x01d4 175 */ add %i5,16,%o1
+/* 0x01d8 180 */ srl %o7,16,%g4
+/* 0x01dc */ add %i2,1,%i2
+/* 0x01e0 */ sra %g2,0,%o0
+/* 0x01e4 175 */ add %o2,8,%o2
+/* 0x01e8 179 */ fmovs %f2,%f4
+/* 0x01ec 180 */ sllx %o0,3,%l0
+/* 0x01f0 */ add %i4,3,%g2
+/* 0x01f4 179 */ ld [%sp+2227],%f5
+/* 0x01f8 175 */ add %l0,16,%o0
+/* 0x01fc 180 */ add %i4,2,%i4
+/* 0x0200 175 */ sub %g5,1,%o7
+/* 0x0204 180 */ add %i2,3,%i2
+/* 0x0208 179 */ fsubd %f4,%f2,%f4
+/* 0x020c */ std %f4,[%i1+%i5]
+/* 0x0210 180 */ st %g4,[%sp+2223]
+/* 0x0214 177 */ ld [%i0+%o3],%i5
+/* 0x0218 180 */ fmovs %f2,%f4
+/* 0x021c */ srl %i5,16,%g4
+/* 0x0220 179 */ and %i5,%g3,%i5
+/* 0x0224 180 */ ld [%sp+2223],%f5
+/* 0x0228 */ fsubd %f4,%f2,%f4
+/* 0x022c */ std %f4,[%i1+%l0]
+/* 0x0230 */ st %g4,[%sp+2223]
+/* 0x0234 177 */ ld [%i0+%o5],%g4
+/* 0x0238 179 */ st %i5,[%sp+2227]
+/* 0x023c 178 */ fmovs %f2,%f4
+/* 0x0240 180 */ srl %g4,16,%i5
+/* 0x0244 179 */ and %g4,%g3,%g4
+/* 0x0248 180 */ ld [%sp+2223],%f7
+/* 0x024c */ st %i5,[%sp+2223]
+/* 0x0250 178 */ ld [%i0+%o3],%f5
+/* 0x0254 180 */ fsubd %f6,%f2,%f6
+/* 0x0258 177 */ ld [%i0+%o4],%o3
+/* 0x025c 178 */ fsubd %f4,%f2,%f4
+/* 0x0260 179 */ ld [%sp+2227],%f9
+/* 0x0264 180 */ ld [%sp+2223],%f1
+/* 0x0268 179 */ st %g4,[%sp+2227]
+/* 0x026c */ fsubd %f8,%f2,%f8
+/* 0x0270 */ std %f8,[%i1+%o1]
+/* 0x0274 180 */ std %f6,[%i1+%o0]
+/* 0x0278 178 */ std %f4,[%i3+%o2]
+ .L900000411:
+/* 0x027c 179 */ ld [%sp+2227],%f13
+/* 0x0280 180 */ srl %o3,16,%g4
+/* 0x0284 */ add %i2,2,%i2
+/* 0x0288 */ st %g4,[%sp+2223]
+/* 0x028c */ cmp %i2,%o7
+/* 0x0290 */ add %g2,4,%g2
+/* 0x0294 178 */ ld [%i0+%o5],%f11
+/* 0x0298 180 */ add %i4,4,%i4
+/* 0x029c 175 */ add %o4,4,%o5
+/* 0x02a0 177 */ ld [%i0+%o5],%g4
+/* 0x02a4 179 */ and %o3,%g3,%o3
+/* 0x02a8 */ st %o3,[%sp+2227]
+/* 0x02ac 180 */ fmovs %f2,%f0
+/* 0x02b0 179 */ fmovs %f2,%f12
+/* 0x02b4 180 */ fsubd %f0,%f2,%f8
+/* 0x02b8 179 */ fsubd %f12,%f2,%f4
+/* 0x02bc 175 */ add %o1,16,%o1
+/* 0x02c0 180 */ ld [%sp+2223],%f7
+/* 0x02c4 178 */ fmovs %f2,%f10
+/* 0x02c8 179 */ std %f4,[%i1+%o1]
+/* 0x02cc 175 */ add %o0,16,%o0
+/* 0x02d0 178 */ fsubd %f10,%f2,%f4
+/* 0x02d4 175 */ add %o2,8,%o2
+/* 0x02d8 180 */ std %f8,[%i1+%o0]
+/* 0x02dc 178 */ std %f4,[%i3+%o2]
+/* 0x02e0 179 */ ld [%sp+2227],%f9
+/* 0x02e4 180 */ srl %g4,16,%o3
+/* 0x02e8 */ st %o3,[%sp+2223]
+/* 0x02ec 178 */ ld [%i0+%o4],%f5
+/* 0x02f0 175 */ add %o4,8,%o4
+/* 0x02f4 177 */ ld [%i0+%o4],%o3
+/* 0x02f8 179 */ and %g4,%g3,%g4
+/* 0x02fc */ st %g4,[%sp+2227]
+/* 0x0300 180 */ fmovs %f2,%f6
+/* 0x0304 179 */ fmovs %f2,%f8
+/* 0x0308 180 */ fsubd %f6,%f2,%f6
+/* 0x030c 179 */ fsubd %f8,%f2,%f8
+/* 0x0310 175 */ add %o1,16,%o1
+/* 0x0314 180 */ ld [%sp+2223],%f1
+/* 0x0318 178 */ fmovs %f2,%f4
+/* 0x031c 179 */ std %f8,[%i1+%o1]
+/* 0x0320 175 */ add %o0,16,%o0
+/* 0x0324 178 */ fsubd %f4,%f2,%f4
+/* 0x0328 175 */ add %o2,8,%o2
+/* 0x032c 180 */ std %f6,[%i1+%o0]
+/* 0x0330 */ bl,pt %icc,.L900000411
+/* 0x0334 */ std %f4,[%i3+%o2]
+ .L900000414:
+/* 0x0338 180 */ srl %o3,16,%o7
+/* 0x033c */ st %o7,[%sp+2223]
+/* 0x0340 179 */ fmovs %f2,%f12
+/* 0x0344 178 */ ld [%i0+%o5],%f11
+/* 0x0348 180 */ fmovs %f2,%f0
+/* 0x034c 179 */ and %o3,%g3,%g4
+/* 0x0350 180 */ fmovs %f2,%f6
+/* 0x0354 175 */ add %o1,16,%o3
+/* 0x0358 */ add %o0,16,%o7
+/* 0x035c 178 */ fmovs %f2,%f10
+/* 0x0360 175 */ add %o2,8,%o2
+/* 0x0364 */ add %o1,32,%o5
+/* 0x0368 179 */ ld [%sp+2227],%f13
+/* 0x036c 178 */ fmovs %f2,%f4
+/* 0x0370 175 */ add %o0,32,%o1
+/* 0x0374 180 */ ld [%sp+2223],%f7
+/* 0x0378 175 */ add %o2,8,%o0
+/* 0x037c 180 */ cmp %i2,%g5
+/* 0x0380 179 */ st %g4,[%sp+2227]
+/* 0x0384 */ fsubd %f12,%f2,%f8
+/* 0x0388 180 */ add %g2,6,%g2
+/* 0x038c 179 */ std %f8,[%i1+%o3]
+/* 0x0390 180 */ fsubd %f0,%f2,%f0
+/* 0x0394 177 */ sra %i2,0,%o3
+/* 0x0398 180 */ std %f0,[%i1+%o7]
+/* 0x039c 178 */ fsubd %f10,%f2,%f0
+/* 0x03a0 180 */ add %i4,6,%i4
+/* 0x03a4 178 */ std %f0,[%i3+%o2]
+/* 0x03a8 */ sllx %o3,2,%o2
+/* 0x03ac 179 */ ld [%sp+2227],%f9
+/* 0x03b0 178 */ ld [%i0+%o4],%f5
+/* 0x03b4 179 */ fmovs %f2,%f8
+/* 0x03b8 */ fsubd %f8,%f2,%f0
+/* 0x03bc */ std %f0,[%i1+%o5]
+/* 0x03c0 180 */ fsubd %f6,%f2,%f0
+/* 0x03c4 */ std %f0,[%i1+%o1]
+/* 0x03c8 178 */ fsubd %f4,%f2,%f0
+/* 0x03cc 180 */ bge,pn %icc,.L77000164
+/* 0x03d0 */ std %f0,[%i3+%o0]
+ .L77000161:
+/* 0x03d4 178 */ ldd [%g1],%f2
+ .L900000416:
+/* 0x03d8 178 */ ld [%i0+%o2],%f5
+/* 0x03dc 179 */ sra %i4,0,%o0
+/* 0x03e0 180 */ add %i2,1,%i2
+/* 0x03e4 177 */ ld [%i0+%o2],%o1
+/* 0x03e8 178 */ sllx %o3,3,%o3
+/* 0x03ec 180 */ add %i4,2,%i4
+/* 0x03f0 178 */ fmovs %f2,%f4
+/* 0x03f4 179 */ sllx %o0,3,%o4
+/* 0x03f8 180 */ cmp %i2,%g5
+/* 0x03fc 179 */ and %o1,%g3,%o0
+/* 0x0400 178 */ fsubd %f4,%f2,%f0
+/* 0x0404 */ std %f0,[%i3+%o3]
+/* 0x0408 180 */ srl %o1,16,%o1
+/* 0x040c 179 */ st %o0,[%sp+2227]
+/* 0x0410 180 */ sra %g2,0,%o0
+/* 0x0414 */ add %g2,2,%g2
+/* 0x0418 177 */ sra %i2,0,%o3
+/* 0x041c 180 */ sllx %o0,3,%o0
+/* 0x0420 179 */ fmovs %f2,%f4
+/* 0x0424 */ sllx %o3,2,%o2
+/* 0x0428 */ ld [%sp+2227],%f5
+/* 0x042c */ fsubd %f4,%f2,%f0
+/* 0x0430 */ std %f0,[%i1+%o4]
+/* 0x0434 180 */ st %o1,[%sp+2223]
+/* 0x0438 */ fmovs %f2,%f4
+/* 0x043c */ ld [%sp+2223],%f5
+/* 0x0440 */ fsubd %f4,%f2,%f0
+/* 0x0444 */ std %f0,[%i1+%o0]
+/* 0x0448 */ bl,a,pt %icc,.L900000416
+/* 0x044c */ ldd [%g1],%f2
+ .L77000164:
+/* 0x0450 */ ret ! Result =
+/* 0x0454 */ restore %g0,%g0,%g0
+/* 0x0458 0 */ .type conv_i32_to_d32_and_d16,2
+/* 0x0458 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! SUBROUTINE adjust_montf_result
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global adjust_montf_result
+ adjust_montf_result:
+/* 000000 */ save %sp,-176,%sp
+/* 0x0004 */ or %g0,%i2,%o1
+/* 0x0008 */ or %g0,%i0,%i2
+
+! 181 ! }
+! 182 !}
+! 185 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
+! 186 !{
+! 187 !long long acc;
+! 188 !int i;
+! 190 ! if(i32[len]>0) i=-1;
+
+/* 0x000c 190 */ sra %o1,0,%g2
+/* 0x0010 */ or %g0,-1,%o2
+/* 0x0014 */ sllx %g2,2,%g2
+/* 0x0018 */ ld [%i2+%g2],%g2
+/* 0x001c */ cmp %g2,0
+/* 0x0020 */ bleu,pn %icc,.L77000175
+/* 0x0024 */ or %g0,%i1,%i0
+/* 0x0028 */ ba .L900000511
+/* 0x002c */ cmp %o2,0
+ .L77000175:
+
+! 191 ! else
+! 192 ! {
+! 193 ! for(i=len-1; i>=0; i--)
+
+/* 0x0030 193 */ sub %o1,1,%o2
+/* 0x0034 */ cmp %o2,0
+/* 0x0038 */ bl,pn %icc,.L77000182
+/* 0x003c */ sra %o2,0,%g2
+ .L900000510:
+
+! 194 ! {
+! 195 ! if(i32[i]!=nint[i]) break;
+
+/* 0x0040 195 */ sllx %g2,2,%g2
+/* 0x0044 */ sub %o2,1,%o0
+/* 0x0048 */ ld [%i1+%g2],%g3
+/* 0x004c */ ld [%i2+%g2],%g2
+/* 0x0050 */ cmp %g2,%g3
+/* 0x0054 */ bne,pn %icc,.L77000182
+/* 0x0058 */ nop
+/* 0x005c 0 */ or %g0,%o0,%o2
+/* 0x0060 195 */ cmp %o0,0
+/* 0x0064 */ bge,pt %icc,.L900000510
+/* 0x0068 */ sra %o2,0,%g2
+ .L77000182:
+
+! 196 ! }
+! 197 ! }
+! 198 ! if((i<0)||(i32[i]>nint[i]))
+
+/* 0x006c 198 */ cmp %o2,0
+ .L900000511:
+/* 0x0070 198 */ bl,pn %icc,.L77000198
+/* 0x0074 */ sra %o2,0,%g2
+/* 0x0078 */ sllx %g2,2,%g2
+/* 0x007c */ ld [%i1+%g2],%g3
+/* 0x0080 */ ld [%i2+%g2],%g2
+/* 0x0084 */ cmp %g2,%g3
+/* 0x0088 */ bleu,pt %icc,.L77000191
+/* 0x008c */ nop
+ .L77000198:
+
+! 199 ! {
+! 200 ! acc=0;
+! 201 ! for(i=0;i<len;i++)
+
+/* 0x0090 201 */ cmp %o1,0
+/* 0x0094 */ ble,pt %icc,.L77000191
+/* 0x0098 */ nop
+/* 0x009c 198 */ or %g0,-1,%g2
+/* 0x00a0 201 */ or %g0,%o1,%g3
+/* 0x00a4 198 */ srl %g2,0,%g2
+/* 0x00a8 */ sub %o1,1,%g4
+/* 0x00ac */ cmp %o1,9
+/* 0x00b0 201 */ or %g0,0,%i1
+/* 0x00b4 200 */ or %g0,0,%g5
+
+! 202 ! {
+! 203 ! acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
+
+/* 0x00b8 203 */ or %g0,0,%o1
+/* 0x00bc 201 */ bl,pn %icc,.L77000199
+/* 0x00c0 */ sub %g3,4,%o7
+/* 0x00c4 203 */ ld [%i2],%o1
+
+! 204 ! i32[i]=acc&0xffffffff;
+! 205 ! acc=acc>>32;
+
+/* 0x00c8 205 */ or %g0,5,%i1
+/* 0x00cc 203 */ ld [%i0],%o2
+/* 0x00d0 201 */ or %g0,8,%o5
+/* 0x00d4 */ or %g0,12,%o4
+/* 0x00d8 203 */ ld [%i0+4],%o3
+/* 0x00dc 201 */ or %g0,16,%g1
+/* 0x00e0 203 */ ld [%i2+4],%o0
+/* 0x00e4 201 */ sub %o1,%o2,%o1
+/* 0x00e8 203 */ ld [%i0+8],%i3
+/* 0x00ec 204 */ and %o1,%g2,%g5
+/* 0x00f0 */ st %g5,[%i2]
+/* 0x00f4 205 */ srax %o1,32,%g5
+/* 0x00f8 201 */ sub %o0,%o3,%o0
+/* 0x00fc 203 */ ld [%i0+12],%o2
+/* 0x0100 201 */ add %o0,%g5,%o0
+/* 0x0104 204 */ and %o0,%g2,%g5
+/* 0x0108 */ st %g5,[%i2+4]
+/* 0x010c 205 */ srax %o0,32,%o0
+/* 0x0110 203 */ ld [%i2+8],%o1
+/* 0x0114 */ ld [%i2+12],%o3
+/* 0x0118 201 */ sub %o1,%i3,%o1
+ .L900000505:
+/* 0x011c */ add %g1,4,%g3
+/* 0x0120 203 */ ld [%g1+%i2],%g5
+/* 0x0124 201 */ add %o1,%o0,%o0
+/* 0x0128 203 */ ld [%i0+%g1],%i3
+/* 0x012c 201 */ sub %o3,%o2,%o1
+/* 0x0130 204 */ and %o0,%g2,%o2
+/* 0x0134 */ st %o2,[%o5+%i2]
+/* 0x0138 205 */ srax %o0,32,%o2
+/* 0x013c */ add %i1,4,%i1
+/* 0x0140 201 */ add %g1,8,%o5
+/* 0x0144 203 */ ld [%g3+%i2],%o0
+/* 0x0148 201 */ add %o1,%o2,%o1
+/* 0x014c 203 */ ld [%i0+%g3],%o3
+/* 0x0150 201 */ sub %g5,%i3,%o2
+/* 0x0154 204 */ and %o1,%g2,%g5
+/* 0x0158 */ st %g5,[%o4+%i2]
+/* 0x015c 205 */ srax %o1,32,%g5
+/* 0x0160 */ cmp %i1,%o7
+/* 0x0164 201 */ add %g1,12,%o4
+/* 0x0168 203 */ ld [%o5+%i2],%o1
+/* 0x016c 201 */ add %o2,%g5,%o2
+/* 0x0170 203 */ ld [%i0+%o5],%i3
+/* 0x0174 201 */ sub %o0,%o3,%o0
+/* 0x0178 204 */ and %o2,%g2,%o3
+/* 0x017c */ st %o3,[%g1+%i2]
+/* 0x0180 205 */ srax %o2,32,%g5
+/* 0x0184 203 */ ld [%o4+%i2],%o3
+/* 0x0188 201 */ add %g1,16,%g1
+/* 0x018c */ add %o0,%g5,%o0
+/* 0x0190 203 */ ld [%i0+%o4],%o2
+/* 0x0194 201 */ sub %o1,%i3,%o1
+/* 0x0198 204 */ and %o0,%g2,%g5
+/* 0x019c */ st %g5,[%g3+%i2]
+/* 0x01a0 205 */ ble,pt %icc,.L900000505
+/* 0x01a4 */ srax %o0,32,%o0
+ .L900000508:
+/* 0x01a8 */ add %o1,%o0,%g3
+/* 0x01ac */ sub %o3,%o2,%o1
+/* 0x01b0 203 */ ld [%g1+%i2],%o0
+/* 0x01b4 */ ld [%i0+%g1],%o2
+/* 0x01b8 205 */ srax %g3,32,%o7
+/* 0x01bc 204 */ and %g3,%g2,%o3
+/* 0x01c0 201 */ add %o1,%o7,%o1
+/* 0x01c4 204 */ st %o3,[%o5+%i2]
+/* 0x01c8 205 */ cmp %i1,%g4
+/* 0x01cc 201 */ sub %o0,%o2,%o0
+/* 0x01d0 204 */ and %o1,%g2,%o2
+/* 0x01d4 */ st %o2,[%o4+%i2]
+/* 0x01d8 205 */ srax %o1,32,%o1
+/* 0x01dc 203 */ sra %i1,0,%o2
+/* 0x01e0 201 */ add %o0,%o1,%o0
+/* 0x01e4 205 */ srax %o0,32,%g5
+/* 0x01e8 204 */ and %o0,%g2,%o1
+/* 0x01ec */ st %o1,[%g1+%i2]
+/* 0x01f0 205 */ bg,pn %icc,.L77000191
+/* 0x01f4 */ sllx %o2,2,%o1
+ .L77000199:
+/* 0x01f8 0 */ or %g0,%o1,%g1
+ .L900000509:
+/* 0x01fc 203 */ ld [%o1+%i2],%o0
+/* 0x0200 205 */ add %i1,1,%i1
+/* 0x0204 203 */ ld [%i0+%o1],%o1
+/* 0x0208 */ sra %i1,0,%o2
+/* 0x020c 205 */ cmp %i1,%g4
+/* 0x0210 203 */ add %g5,%o0,%o0
+/* 0x0214 */ sub %o0,%o1,%o0
+/* 0x0218 205 */ srax %o0,32,%g5
+/* 0x021c 204 */ and %o0,%g2,%o1
+/* 0x0220 */ st %o1,[%g1+%i2]
+/* 0x0224 */ sllx %o2,2,%o1
+/* 0x0228 205 */ ble,pt %icc,.L900000509
+/* 0x022c */ or %g0,%o1,%g1
+ .L77000191:
+/* 0x0230 */ ret ! Result =
+/* 0x0234 */ restore %g0,%g0,%g0
+/* 0x0238 0 */ .type adjust_montf_result,2
+/* 0x0238 */ .size adjust_montf_result,(.-adjust_montf_result)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+/* 000000 */ .skip 24
+!
+! SUBROUTINE mont_mulf_noconv
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global mont_mulf_noconv
+ mont_mulf_noconv:
+/* 000000 */ save %sp,-224,%sp
+ .L900000643:
+/* 0x0004 */ call .+8
+/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000643-.)),%g5
+/* 0x000c */ ldx [%fp+2223],%l0
+
+! 206 ! }
+! 207 ! }
+! 208 !}
+! 213 !/*
+! 214 !** the lengths of the input arrays should be at least the following:
+! 215 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
+! 216 !** all of them should be different from one another
+! 217 !**
+! 218 !*/
+! 219 !void mont_mulf_noconv(unsigned int *result,
+! 220 ! double *dm1, double *dm2, double *dt,
+! 221 ! double *dn, unsigned int *nint,
+! 222 ! int nlen, double dn0)
+! 223 !{
+! 224 ! int i, j, jj;
+! 225 ! int tmp;
+! 226 ! double digit, m2j, nextm2j, a, b;
+! 227 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
+! 229 ! pdm1=&(dm1[0]);
+! 230 ! pdm2=&(dm2[0]);
+! 231 ! pdn=&(dn[0]);
+! 232 ! pdm2[2*nlen]=Zero;
+
+/* 0x0010 232 */ sethi %hi(Zero),%g2
+/* 0x0014 223 */ fmovd %f14,%f30
+/* 0x0018 */ add %g5,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000643-.)),%g5
+/* 0x001c 232 */ add %g2,%lo(Zero),%g2
+/* 0x0020 */ sll %l0,1,%o3
+/* 0x0024 223 */ add %g5,%o7,%o4
+/* 0x0028 232 */ sra %o3,0,%g5
+/* 0x002c */ ldx [%o4+%g2],%o7
+
+! 234 ! if (nlen!=16)
+! 235 ! {
+! 236 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
+! 238 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 239 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+
+/* 0x0030 239 */ sethi %hi(TwoToMinus16),%g3
+/* 0x0034 */ sethi %hi(TwoTo16),%g4
+/* 0x0038 */ add %g3,%lo(TwoToMinus16),%g2
+/* 0x003c 232 */ ldd [%o7],%f0
+/* 0x0040 239 */ add %g4,%lo(TwoTo16),%g3
+/* 0x0044 223 */ or %g0,%i4,%o0
+/* 0x0048 232 */ sllx %g5,3,%g4
+/* 0x004c 239 */ ldx [%o4+%g2],%o5
+/* 0x0050 223 */ or %g0,%i5,%l3
+/* 0x0054 */ or %g0,%i0,%l2
+/* 0x0058 239 */ ldx [%o4+%g3],%o4
+/* 0x005c 234 */ cmp %l0,16
+/* 0x0060 232 */ std %f0,[%i2+%g4]
+/* 0x0064 234 */ be,pn %icc,.L77000279
+/* 0x0068 */ or %g0,%i3,%l4
+/* 0x006c 236 */ sll %l0,2,%g2
+/* 0x0070 223 */ or %g0,%o0,%i5
+/* 0x0074 236 */ add %g2,2,%o0
+/* 0x0078 223 */ or %g0,%i1,%i4
+/* 0x007c 236 */ cmp %o0,0
+/* 0x0080 223 */ or %g0,%i2,%l1
+/* 0x0084 236 */ ble,a,pt %icc,.L900000657
+/* 0x0088 */ ldd [%i1],%f6
+
+! 241 ! pdtj=&(dt[0]);
+! 242 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
+! 243 ! {
+! 244 ! m2j=pdm2[j];
+! 245 ! a=pdtj[0]+pdn[0]*digit;
+! 246 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
+! 247 ! pdtj[1]=b;
+! 249 !#pragma pipeloop(0)
+! 250 ! for(i=1;i<nlen;i++)
+! 251 ! {
+! 252 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 253 ! }
+! 254 ! if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
+! 255 !
+! 256 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 257 ! }
+! 258 ! }
+! 259 ! else
+! 260 ! {
+! 261 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 263 ! dt[65]= dt[64]= dt[63]= dt[62]= dt[61]= dt[60]=
+! 264 ! dt[59]= dt[58]= dt[57]= dt[56]= dt[55]= dt[54]=
+! 265 ! dt[53]= dt[52]= dt[51]= dt[50]= dt[49]= dt[48]=
+! 266 ! dt[47]= dt[46]= dt[45]= dt[44]= dt[43]= dt[42]=
+! 267 ! dt[41]= dt[40]= dt[39]= dt[38]= dt[37]= dt[36]=
+! 268 ! dt[35]= dt[34]= dt[33]= dt[32]= dt[31]= dt[30]=
+! 269 ! dt[29]= dt[28]= dt[27]= dt[26]= dt[25]= dt[24]=
+! 270 ! dt[23]= dt[22]= dt[21]= dt[20]= dt[19]= dt[18]=
+! 271 ! dt[17]= dt[16]= dt[15]= dt[14]= dt[13]= dt[12]=
+! 272 ! dt[11]= dt[10]= dt[ 9]= dt[ 8]= dt[ 7]= dt[ 6]=
+! 273 ! dt[ 5]= dt[ 4]= dt[ 3]= dt[ 2]= dt[ 1]=Zero;
+! 275 ! pdn_0=pdn[0];
+! 276 ! pdm1_0=pdm1[0];
+! 278 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 279 ! pdtj=&(dt[0]);
+! 281 ! for(j=0;j<32;j++,pdtj++)
+
+/* 0x008c 281 */ or %g0,%o0,%o1
+/* 0x0090 236 */ sub %o0,1,%g1
+/* 0x0094 */ or %g0,0,%g2
+/* 0x0098 281 */ cmp %o1,5
+/* 0x009c */ bl,pn %icc,.L77000280
+/* 0x00a0 */ or %g0,8,%o0
+/* 0x00a4 */ std %f0,[%i3]
+/* 0x00a8 */ or %g0,2,%g2
+/* 0x00ac */ sub %g1,2,%o1
+ .L900000627:
+/* 0x00b0 */ add %o0,8,%g3
+/* 0x00b4 */ std %f0,[%i3+%o0]
+/* 0x00b8 */ add %g2,3,%g2
+/* 0x00bc */ add %o0,16,%o2
+/* 0x00c0 */ std %f0,[%i3+%g3]
+/* 0x00c4 */ cmp %g2,%o1
+/* 0x00c8 */ add %o0,24,%o0
+/* 0x00cc */ ble,pt %icc,.L900000627
+/* 0x00d0 */ std %f0,[%i3+%o2]
+ .L900000630:
+/* 0x00d4 */ cmp %g2,%g1
+/* 0x00d8 */ bg,pn %icc,.L77000285
+/* 0x00dc */ std %f0,[%i3+%o0]
+ .L77000280:
+/* 0x00e0 */ ldd [%o7],%f0
+ .L900000656:
+/* 0x00e4 */ sra %g2,0,%o0
+/* 0x00e8 */ add %g2,1,%g2
+/* 0x00ec */ sllx %o0,3,%o0
+/* 0x00f0 */ cmp %g2,%g1
+/* 0x00f4 */ std %f0,[%i3+%o0]
+/* 0x00f8 */ ble,a,pt %icc,.L900000656
+/* 0x00fc */ ldd [%o7],%f0
+ .L77000285:
+/* 0x0100 238 */ ldd [%i1],%f6
+ .L900000657:
+/* 0x0104 238 */ ldd [%i2],%f8
+/* 0x0108 242 */ cmp %o3,0
+/* 0x010c */ sub %o3,1,%o1
+/* 0x0110 239 */ ldd [%o7],%f10
+/* 0x0114 */ add %o3,1,%o2
+/* 0x0118 0 */ or %g0,0,%i2
+/* 0x011c 238 */ fmuld %f6,%f8,%f6
+/* 0x0120 */ std %f6,[%i3]
+/* 0x0124 0 */ or %g0,0,%g3
+/* 0x0128 239 */ ldd [%o5],%f8
+/* 0x012c 0 */ or %g0,%o2,%g1
+/* 0x0130 236 */ sub %l0,1,%i1
+/* 0x0134 239 */ ldd [%o4],%f12
+/* 0x0138 236 */ or %g0,1,%g4
+/* 0x013c */ fdtox %f6,%f0
+/* 0x0140 */ fmovs %f10,%f0
+/* 0x0144 */ fxtod %f0,%f6
+/* 0x0148 239 */ fmuld %f6,%f14,%f6
+/* 0x014c */ fmuld %f6,%f8,%f8
+/* 0x0150 */ fdtox %f8,%f8
+/* 0x0154 */ fxtod %f8,%f8
+/* 0x0158 */ fmuld %f8,%f12,%f8
+/* 0x015c */ fsubd %f6,%f8,%f20
+/* 0x0160 242 */ ble,pt %icc,.L900000650
+/* 0x0164 */ sllx %g5,3,%g2
+/* 0x0168 0 */ st %o1,[%sp+2223]
+/* 0x016c 246 */ ldd [%i5],%f6
+ .L900000651:
+/* 0x0170 246 */ sra %g4,0,%g2
+/* 0x0174 */ fmuld %f6,%f20,%f6
+/* 0x0178 */ ldd [%i3],%f12
+/* 0x017c */ sllx %g2,3,%g2
+/* 0x0180 */ ldd [%i4],%f8
+/* 0x0184 250 */ cmp %l0,1
+/* 0x0188 246 */ ldd [%l1+%g2],%f10
+/* 0x018c 244 */ sra %i2,0,%g2
+/* 0x0190 */ add %i2,1,%i0
+/* 0x0194 246 */ faddd %f12,%f6,%f6
+/* 0x0198 */ ldd [%o5],%f12
+/* 0x019c 244 */ sllx %g2,3,%g2
+/* 0x01a0 246 */ fmuld %f8,%f10,%f8
+/* 0x01a4 */ ldd [%i3+8],%f10
+/* 0x01a8 */ srl %i2,31,%o3
+/* 0x01ac 244 */ ldd [%l1+%g2],%f18
+/* 0x01b0 0 */ or %g0,1,%l5
+/* 0x01b4 236 */ or %g0,2,%g2
+/* 0x01b8 246 */ fmuld %f6,%f12,%f6
+/* 0x01bc 250 */ or %g0,32,%o1
+/* 0x01c0 */ or %g0,48,%o2
+/* 0x01c4 246 */ faddd %f10,%f8,%f8
+/* 0x01c8 */ faddd %f8,%f6,%f16
+/* 0x01cc 250 */ ble,pn %icc,.L77000213
+/* 0x01d0 */ std %f16,[%i3+8]
+/* 0x01d4 */ cmp %i1,8
+/* 0x01d8 */ sub %l0,3,%o3
+/* 0x01dc */ bl,pn %icc,.L77000284
+/* 0x01e0 */ or %g0,8,%o0
+/* 0x01e4 252 */ ldd [%i5+8],%f0
+/* 0x01e8 */ or %g0,6,%l5
+/* 0x01ec */ ldd [%i4+8],%f2
+/* 0x01f0 */ or %g0,4,%g2
+/* 0x01f4 250 */ or %g0,40,%o0
+/* 0x01f8 252 */ ldd [%i5+16],%f8
+/* 0x01fc */ fmuld %f0,%f20,%f10
+/* 0x0200 */ ldd [%i4+16],%f4
+/* 0x0204 */ fmuld %f2,%f18,%f2
+/* 0x0208 */ ldd [%i3+16],%f0
+/* 0x020c */ fmuld %f8,%f20,%f12
+/* 0x0210 */ ldd [%i4+24],%f6
+/* 0x0214 */ fmuld %f4,%f18,%f4
+/* 0x0218 */ ldd [%i5+24],%f8
+/* 0x021c */ faddd %f2,%f10,%f2
+/* 0x0220 */ ldd [%i4+32],%f14
+/* 0x0224 */ fmuld %f6,%f18,%f10
+/* 0x0228 */ ldd [%i5+32],%f6
+/* 0x022c */ faddd %f4,%f12,%f4
+/* 0x0230 */ ldd [%i4+40],%f12
+/* 0x0234 */ faddd %f0,%f2,%f0
+/* 0x0238 */ std %f0,[%i3+16]
+/* 0x023c */ ldd [%i3+32],%f0
+/* 0x0240 */ ldd [%i3+48],%f2
+ .L900000639:
+/* 0x0244 */ add %o2,16,%l6
+/* 0x0248 252 */ ldd [%i5+%o0],%f22
+/* 0x024c */ add %l5,3,%l5
+/* 0x0250 */ fmuld %f8,%f20,%f8
+/* 0x0254 250 */ add %o0,8,%o0
+/* 0x0258 252 */ ldd [%l6+%i3],%f26
+/* 0x025c */ cmp %l5,%o3
+/* 0x0260 */ ldd [%i4+%o0],%f24
+/* 0x0264 */ faddd %f0,%f4,%f0
+/* 0x0268 */ add %g2,6,%g2
+/* 0x026c */ faddd %f10,%f8,%f10
+/* 0x0270 */ fmuld %f14,%f18,%f4
+/* 0x0274 */ std %f0,[%o1+%i3]
+/* 0x0278 250 */ add %o2,32,%o1
+/* 0x027c 252 */ ldd [%i5+%o0],%f8
+/* 0x0280 */ fmuld %f6,%f20,%f6
+/* 0x0284 250 */ add %o0,8,%o0
+/* 0x0288 252 */ ldd [%o1+%i3],%f0
+/* 0x028c */ ldd [%i4+%o0],%f14
+/* 0x0290 */ faddd %f2,%f10,%f2
+/* 0x0294 */ faddd %f4,%f6,%f10
+/* 0x0298 */ fmuld %f12,%f18,%f4
+/* 0x029c */ std %f2,[%o2+%i3]
+/* 0x02a0 250 */ add %o2,48,%o2
+/* 0x02a4 252 */ ldd [%i5+%o0],%f6
+/* 0x02a8 */ fmuld %f22,%f20,%f22
+/* 0x02ac 250 */ add %o0,8,%o0
+/* 0x02b0 252 */ ldd [%o2+%i3],%f2
+/* 0x02b4 */ ldd [%i4+%o0],%f12
+/* 0x02b8 */ faddd %f26,%f10,%f10
+/* 0x02bc */ std %f10,[%l6+%i3]
+/* 0x02c0 */ fmuld %f24,%f18,%f10
+/* 0x02c4 */ ble,pt %icc,.L900000639
+/* 0x02c8 */ faddd %f4,%f22,%f4
+ .L900000642:
+/* 0x02cc 252 */ fmuld %f8,%f20,%f24
+/* 0x02d0 */ faddd %f0,%f4,%f8
+/* 0x02d4 250 */ add %o2,16,%o3
+/* 0x02d8 252 */ ldd [%o3+%i3],%f4
+/* 0x02dc */ fmuld %f14,%f18,%f0
+/* 0x02e0 */ cmp %l5,%i1
+/* 0x02e4 */ std %f8,[%o1+%i3]
+/* 0x02e8 */ fmuld %f12,%f18,%f8
+/* 0x02ec 250 */ add %o2,32,%o1
+/* 0x02f0 252 */ faddd %f10,%f24,%f12
+/* 0x02f4 */ ldd [%i5+%o0],%f22
+/* 0x02f8 */ fmuld %f6,%f20,%f6
+/* 0x02fc */ add %g2,8,%g2
+/* 0x0300 */ fmuld %f22,%f20,%f10
+/* 0x0304 */ faddd %f2,%f12,%f2
+/* 0x0308 */ faddd %f0,%f6,%f6
+/* 0x030c */ ldd [%o1+%i3],%f0
+/* 0x0310 */ std %f2,[%o2+%i3]
+/* 0x0314 */ faddd %f8,%f10,%f2
+/* 0x0318 */ sra %l5,0,%o2
+/* 0x031c */ sllx %o2,3,%o0
+/* 0x0320 */ faddd %f4,%f6,%f4
+/* 0x0324 */ std %f4,[%o3+%i3]
+/* 0x0328 */ faddd %f0,%f2,%f0
+/* 0x032c */ std %f0,[%o1+%i3]
+/* 0x0330 */ bg,a,pn %icc,.L77000213
+/* 0x0334 */ srl %i2,31,%o3
+ .L77000284:
+/* 0x0338 252 */ ldd [%i4+%o0],%f2
+ .L900000655:
+/* 0x033c 252 */ ldd [%i5+%o0],%f0
+/* 0x0340 */ fmuld %f2,%f18,%f2
+/* 0x0344 */ sra %g2,0,%o0
+/* 0x0348 */ sllx %o0,3,%o1
+/* 0x034c */ add %l5,1,%l5
+/* 0x0350 */ fmuld %f0,%f20,%f4
+/* 0x0354 */ ldd [%o1+%i3],%f0
+/* 0x0358 */ sra %l5,0,%o2
+/* 0x035c */ sllx %o2,3,%o0
+/* 0x0360 */ add %g2,2,%g2
+/* 0x0364 */ cmp %l5,%i1
+/* 0x0368 */ faddd %f2,%f4,%f2
+/* 0x036c */ faddd %f0,%f2,%f0
+/* 0x0370 */ std %f0,[%o1+%i3]
+/* 0x0374 */ ble,a,pt %icc,.L900000655
+/* 0x0378 */ ldd [%i4+%o0],%f2
+ .L900000626:
+/* 0x037c */ srl %i2,31,%o3
+/* 0x0380 252 */ ba .L900000654
+/* 0x0384 */ cmp %g3,30
+ .L77000213:
+/* 0x0388 254 */ cmp %g3,30
+ .L900000654:
+/* 0x038c */ add %i2,%o3,%o0
+/* 0x0390 254 */ bne,a,pt %icc,.L900000653
+/* 0x0394 */ fdtox %f16,%f0
+/* 0x0398 281 */ sra %o0,1,%g2
+/* 0x039c */ add %g2,1,%g2
+/* 0x03a0 */ ldd [%o7],%f0
+/* 0x03a4 */ sll %g2,1,%o1
+/* 0x03a8 */ sll %g1,1,%g2
+/* 0x03ac */ or %g0,%o1,%o2
+/* 0x03b0 */ fmovd %f0,%f2
+/* 0x03b4 */ or %g0,%g2,%o0
+/* 0x03b8 */ cmp %o1,%o0
+/* 0x03bc */ sub %g2,1,%o0
+/* 0x03c0 */ bge,pt %icc,.L77000215
+/* 0x03c4 */ or %g0,0,%g3
+/* 0x03c8 254 */ add %o1,1,%o1
+/* 0x03cc 281 */ sra %o2,0,%g2
+ .L900000652:
+/* 0x03d0 */ sllx %g2,3,%g2
+/* 0x03d4 */ ldd [%o7],%f6
+/* 0x03d8 */ add %o2,2,%o2
+/* 0x03dc */ sra %o1,0,%g3
+/* 0x03e0 */ ldd [%g2+%l4],%f8
+/* 0x03e4 */ cmp %o2,%o0
+/* 0x03e8 */ sllx %g3,3,%g3
+/* 0x03ec */ add %o1,2,%o1
+/* 0x03f0 */ ldd [%l4+%g3],%f10
+/* 0x03f4 */ fdtox %f8,%f12
+/* 0x03f8 */ fdtox %f10,%f4
+/* 0x03fc */ fmovd %f12,%f8
+/* 0x0400 */ fmovs %f6,%f12
+/* 0x0404 */ fmovs %f6,%f4
+/* 0x0408 */ fxtod %f12,%f6
+/* 0x040c */ fxtod %f4,%f12
+/* 0x0410 */ fdtox %f10,%f4
+/* 0x0414 */ faddd %f6,%f2,%f6
+/* 0x0418 */ std %f6,[%g2+%l4]
+/* 0x041c */ faddd %f12,%f0,%f6
+/* 0x0420 */ std %f6,[%l4+%g3]
+/* 0x0424 */ fitod %f8,%f2
+/* 0x0428 */ fitod %f4,%f0
+/* 0x042c */ ble,pt %icc,.L900000652
+/* 0x0430 */ sra %o2,0,%g2
+ .L77000233:
+/* 0x0434 */ or %g0,0,%g3
+ .L77000215:
+/* 0x0438 */ fdtox %f16,%f0
+ .L900000653:
+/* 0x043c 256 */ ldd [%o7],%f6
+/* 0x0440 */ add %g4,1,%g4
+/* 0x0444 */ or %g0,%i0,%i2
+/* 0x0448 */ ldd [%o5],%f8
+/* 0x044c */ add %g3,1,%g3
+/* 0x0450 */ add %i3,8,%i3
+/* 0x0454 */ fmovs %f6,%f0
+/* 0x0458 */ ldd [%o4],%f10
+/* 0x045c */ ld [%sp+2223],%o0
+/* 0x0460 */ fxtod %f0,%f6
+/* 0x0464 */ cmp %i0,%o0
+/* 0x0468 */ fmuld %f6,%f30,%f6
+/* 0x046c */ fmuld %f6,%f8,%f8
+/* 0x0470 */ fdtox %f8,%f8
+/* 0x0474 */ fxtod %f8,%f8
+/* 0x0478 */ fmuld %f8,%f10,%f8
+/* 0x047c */ fsubd %f6,%f8,%f20
+/* 0x0480 */ ble,a,pt %icc,.L900000651
+/* 0x0484 */ ldd [%i5],%f6
+ .L900000625:
+/* 0x0488 256 */ ba .L900000650
+/* 0x048c */ sllx %g5,3,%g2
+ .L77000279:
+/* 0x0490 261 */ ldd [%i1],%f4
+/* 0x0494 */ ldd [%i2],%f6
+/* 0x0498 273 */ std %f0,[%i3+8]
+/* 0x049c */ std %f0,[%i3+16]
+/* 0x04a0 261 */ fmuld %f4,%f6,%f6
+/* 0x04a4 */ std %f6,[%i3]
+/* 0x04a8 273 */ std %f0,[%i3+24]
+/* 0x04ac */ std %f0,[%i3+32]
+/* 0x04b0 */ fdtox %f6,%f2
+/* 0x04b4 */ std %f0,[%i3+40]
+/* 0x04b8 */ std %f0,[%i3+48]
+/* 0x04bc */ std %f0,[%i3+56]
+/* 0x04c0 */ std %f0,[%i3+64]
+/* 0x04c4 */ fmovs %f0,%f2
+/* 0x04c8 */ std %f0,[%i3+72]
+/* 0x04cc */ std %f0,[%i3+80]
+/* 0x04d0 */ std %f0,[%i3+88]
+/* 0x04d4 */ std %f0,[%i3+96]
+/* 0x04d8 */ std %f0,[%i3+104]
+/* 0x04dc */ std %f0,[%i3+112]
+/* 0x04e0 */ std %f0,[%i3+120]
+/* 0x04e4 */ std %f0,[%i3+128]
+/* 0x04e8 */ std %f0,[%i3+136]
+/* 0x04ec */ std %f0,[%i3+144]
+/* 0x04f0 */ std %f0,[%i3+152]
+/* 0x04f4 */ std %f0,[%i3+160]
+/* 0x04f8 */ std %f0,[%i3+168]
+/* 0x04fc */ fxtod %f2,%f6
+/* 0x0500 */ std %f0,[%i3+176]
+/* 0x0504 281 */ or %g0,1,%o2
+/* 0x0508 273 */ std %f0,[%i3+184]
+
+! 282 ! {
+! 284 ! m2j=pdm2[j];
+! 285 ! a=pdtj[0]+pdn_0*digit;
+! 286 ! b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
+
+/* 0x050c 286 */ sra %o2,0,%g2
+/* 0x0510 279 */ or %g0,%i3,%o3
+/* 0x0514 273 */ std %f0,[%i3+192]
+/* 0x0518 278 */ fmuld %f6,%f14,%f6
+/* 0x051c 281 */ or %g0,0,%g1
+/* 0x0520 273 */ std %f0,[%i3+200]
+/* 0x0524 */ std %f0,[%i3+208]
+/* 0x0528 */ std %f0,[%i3+216]
+/* 0x052c */ std %f0,[%i3+224]
+/* 0x0530 */ std %f0,[%i3+232]
+/* 0x0534 */ std %f0,[%i3+240]
+/* 0x0538 */ std %f0,[%i3+248]
+/* 0x053c */ std %f0,[%i3+256]
+/* 0x0540 */ std %f0,[%i3+264]
+/* 0x0544 */ std %f0,[%i3+272]
+/* 0x0548 */ std %f0,[%i3+280]
+/* 0x054c */ std %f0,[%i3+288]
+/* 0x0550 */ std %f0,[%i3+296]
+/* 0x0554 */ std %f0,[%i3+304]
+/* 0x0558 */ std %f0,[%i3+312]
+/* 0x055c */ std %f0,[%i3+320]
+/* 0x0560 */ std %f0,[%i3+328]
+/* 0x0564 */ std %f0,[%i3+336]
+/* 0x0568 */ std %f0,[%i3+344]
+/* 0x056c */ std %f0,[%i3+352]
+/* 0x0570 */ std %f0,[%i3+360]
+/* 0x0574 */ std %f0,[%i3+368]
+/* 0x0578 */ std %f0,[%i3+376]
+/* 0x057c */ std %f0,[%i3+384]
+/* 0x0580 */ std %f0,[%i3+392]
+/* 0x0584 */ std %f0,[%i3+400]
+/* 0x0588 */ std %f0,[%i3+408]
+/* 0x058c */ std %f0,[%i3+416]
+/* 0x0590 */ std %f0,[%i3+424]
+/* 0x0594 */ std %f0,[%i3+432]
+/* 0x0598 */ std %f0,[%i3+440]
+/* 0x059c */ std %f0,[%i3+448]
+/* 0x05a0 */ std %f0,[%i3+456]
+/* 0x05a4 */ std %f0,[%i3+464]
+/* 0x05a8 */ std %f0,[%i3+472]
+/* 0x05ac */ std %f0,[%i3+480]
+/* 0x05b0 */ std %f0,[%i3+488]
+/* 0x05b4 */ std %f0,[%i3+496]
+/* 0x05b8 278 */ ldd [%o5],%f8
+/* 0x05bc */ ldd [%o4],%f10
+/* 0x05c0 */ fmuld %f6,%f8,%f8
+/* 0x05c4 273 */ std %f0,[%i3+504]
+/* 0x05c8 */ std %f0,[%i3+512]
+/* 0x05cc */ std %f0,[%i3+520]
+/* 0x05d0 */ fdtox %f8,%f8
+/* 0x05d4 275 */ ldd [%o0],%f0
+/* 0x05d8 */ fxtod %f8,%f8
+/* 0x05dc */ fmuld %f8,%f10,%f8
+/* 0x05e0 */ fsubd %f6,%f8,%f2
+
+! 287 ! pdtj[1]=b;
+! 289 ! /**** this loop will be fully unrolled:
+! 290 ! for(i=1;i<16;i++)
+! 291 ! {
+! 292 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 293 ! }
+! 294 ! *************************************/
+! 295 ! pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
+! 296 ! pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
+! 297 ! pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
+! 298 ! pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
+! 299 ! pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
+! 300 ! pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
+! 301 ! pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
+! 302 ! pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
+! 303 ! pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
+! 304 ! pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
+! 305 ! pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
+! 306 ! pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
+! 307 ! pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
+! 308 ! pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
+! 309 ! pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
+! 310 ! /* no need for cleenup, cannot overflow */
+! 311 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+
+
+ fmovd %f2,%f0 ! hand modified
+ fmovd %f30,%f18 ! hand modified
+ ldd [%o0],%f2
+ ldd [%o3],%f8
+ ldd [%i1],%f10
+ ldd [%o5],%f14 ! hand modified
+ ldd [%o4],%f16 ! hand modified
+ ldd [%i2],%f24
+
+ ldd [%i1+8],%f26
+ ldd [%i1+16],%f40
+ ldd [%i1+48],%f46
+ ldd [%i1+56],%f30
+ ldd [%i1+64],%f54
+ ldd [%i1+104],%f34
+ ldd [%i1+112],%f58
+
+ ldd [%o0+8],%f28
+ ldd [%o0+104],%f38
+ ldd [%o0+112],%f60
+
+ .L99999999: !1
+ ldd [%i1+24],%f32
+ fmuld %f0,%f2,%f4 !2
+ ldd [%o0+24],%f36
+ fmuld %f26,%f24,%f20 !3
+ ldd [%i1+40],%f42
+ fmuld %f28,%f0,%f22 !4
+ ldd [%o0+40],%f44
+ fmuld %f32,%f24,%f32 !5
+ ldd [%i2+8],%f6
+ faddd %f4,%f8,%f4
+ fmuld %f36,%f0,%f36 !6
+ add %i2,8,%i2
+ ldd [%o0+56],%f50
+ fmuld %f42,%f24,%f42 !7
+ ldd [%i1+72],%f52
+ faddd %f20,%f22,%f20
+ fmuld %f44,%f0,%f44 !8
+ ldd [%o3+16],%f22
+ fmuld %f10,%f6,%f12 !9
+ ldd [%o0+72],%f56
+ faddd %f32,%f36,%f32
+ fmuld %f14,%f4,%f4 !10
+ ldd [%o3+48],%f36
+ fmuld %f30,%f24,%f48 !11
+ ldd [%o3+8],%f8
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50 !12
+ std %f20,[%o3+16]
+ faddd %f42,%f44,%f42
+ fmuld %f52,%f24,%f52 !13
+ ldd [%o3+80],%f44
+ faddd %f4,%f12,%f4
+ fmuld %f56,%f0,%f56 !14
+ ldd [%i1+88],%f20
+ faddd %f32,%f36,%f32 !15
+ ldd [%o0+88],%f22
+ faddd %f48,%f50,%f48 !16
+ ldd [%o3+112],%f50
+ faddd %f52,%f56,%f52 !17
+ ldd [%o3+144],%f56
+ faddd %f4,%f8,%f8
+ fmuld %f20,%f24,%f20 !18
+ std %f32,[%o3+48]
+ faddd %f42,%f44,%f42
+ fmuld %f22,%f0,%f22 !19
+ std %f42,[%o3+80]
+ faddd %f48,%f50,%f48
+ fmuld %f34,%f24,%f32 !20
+ std %f48,[%o3+112]
+ faddd %f52,%f56,%f52
+ fmuld %f38,%f0,%f36 !21
+ ldd [%i1+120],%f42
+ fdtox %f8,%f4 !22
+ std %f52,[%o3+144]
+ faddd %f20,%f22,%f20 !23
+ ldd [%o0+120],%f44 !24
+ ldd [%o3+176],%f22
+ faddd %f32,%f36,%f32
+ fmuld %f42,%f24,%f42 !25
+ ldd [%o0+16],%f50
+ fmovs %f17,%f4 !26
+ ldd [%i1+32],%f52
+ fmuld %f44,%f0,%f44 !27
+ ldd [%o0+32],%f56
+ fmuld %f40,%f24,%f48 !28
+ ldd [%o3+208],%f36
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50 !29
+ std %f20,[%o3+176]
+ fxtod %f4,%f4
+ fmuld %f52,%f24,%f52 !30
+ ldd [%o0+48],%f22
+ faddd %f42,%f44,%f42
+ fmuld %f56,%f0,%f56 !31
+ ldd [%o3+240],%f44
+ faddd %f32,%f36,%f32 !32
+ std %f32,[%o3+208]
+ faddd %f48,%f50,%f48
+ fmuld %f46,%f24,%f20 !33
+ ldd [%o3+32],%f50
+ fmuld %f4,%f18,%f12 !34
+ ldd [%o0+64],%f36
+ faddd %f52,%f56,%f52
+ fmuld %f22,%f0,%f22 !35
+ ldd [%o3+64],%f56
+ faddd %f42,%f44,%f42 !36
+ std %f42,[%o3+240]
+ faddd %f48,%f50,%f48
+ fmuld %f54,%f24,%f32 !37
+ std %f48,[%o3+32]
+ fmuld %f12,%f14,%f4 !38
+ ldd [%i1+80],%f42
+ faddd %f52,%f56,%f56 ! yes, tmp52!
+ fmuld %f36,%f0,%f36 !39
+ ldd [%o0+80],%f44
+ faddd %f20,%f22,%f20 !40
+ ldd [%i1+96],%f48
+ fmuld %f58,%f24,%f52 !41
+ ldd [%o0+96],%f50
+ fdtox %f4,%f4
+ fmuld %f42,%f24,%f42 !42
+ std %f56,[%o3+64] ! yes, tmp52!
+ faddd %f32,%f36,%f32
+ fmuld %f44,%f0,%f44 !43
+ ldd [%o3+96],%f22
+ fmuld %f48,%f24,%f48 !44
+ ldd [%o3+128],%f36
+ fmovd %f6,%f24
+ fmuld %f50,%f0,%f50 !45
+ fxtod %f4,%f4
+ fmuld %f60,%f0,%f56 !46
+ add %o3,8,%o3
+ faddd %f42,%f44,%f42 !47
+ ldd [%o3+160-8],%f44
+ faddd %f20,%f22,%f20 !48
+ std %f20,[%o3+96-8]
+ faddd %f48,%f50,%f48 !49
+ ldd [%o3+192-8],%f50
+ faddd %f52,%f56,%f52
+ fmuld %f4,%f16,%f4 !50
+ ldd [%o3+224-8],%f56
+ faddd %f32,%f36,%f32 !51
+ std %f32,[%o3+128-8]
+ faddd %f42,%f44,%f42 !52
+ add %g1,1,%g1
+ std %f42,[%o3+160-8]
+ faddd %f48,%f50,%f48 !53
+ cmp %g1,31
+ std %f48,[%o3+192-8]
+ fsubd %f12,%f4,%f0 !54
+ faddd %f52,%f56,%f52
+ ble,pt %icc,.L99999999
+ std %f52,[%o3+224-8] !55
+ std %f8,[%o3]
+! 312 ! }
+! 313 ! }
+! 315 ! conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
+
+/* 0x0844 315 */ sllx %g5,3,%g2
+ .L900000650:
+/* 0x0848 315 */ ldd [%g2+%l4],%f2
+/* 0x084c */ add %l4,%g2,%o0
+/* 0x0850 */ or %g0,0,%g1
+/* 0x0854 */ ldd [%o0+8],%f4
+/* 0x0858 */ or %g0,0,%i2
+/* 0x085c */ cmp %l0,0
+/* 0x0860 */ fdtox %f2,%f2
+/* 0x0864 */ std %f2,[%sp+2255]
+/* 0x0868 311 */ sethi %hi(0xfc00),%o3
+/* 0x086c 315 */ fdtox %f4,%f2
+/* 0x0870 */ std %f2,[%sp+2247]
+/* 0x0874 311 */ or %g0,-1,%o2
+/* 0x0878 */ srl %o2,0,%o5
+/* 0x087c */ or %g0,2,%g5
+/* 0x0880 */ sub %l0,1,%g3
+/* 0x0884 */ or %g0,%o0,%o7
+/* 0x0888 */ add %o3,1023,%o4
+/* 0x088c 315 */ or %g0,64,%o3
+/* 0x0890 */ ldx [%sp+2255],%i0
+/* 0x0894 */ sub %l0,2,%o1
+/* 0x0898 */ ldx [%sp+2247],%i1
+/* 0x089c */ ble,pt %icc,.L900000648
+/* 0x08a0 */ sethi %hi(0xfc00),%g2
+/* 0x08a4 */ cmp %l0,6
+/* 0x08a8 */ and %i0,%o5,%o2
+/* 0x08ac */ bl,pn %icc,.L77000287
+/* 0x08b0 */ or %g0,3,%g4
+/* 0x08b4 */ ldd [%o7+16],%f0
+/* 0x08b8 */ and %i1,%o4,%i3
+/* 0x08bc */ sllx %i3,16,%o0
+/* 0x08c0 */ or %g0,5,%g4
+/* 0x08c4 */ srax %i1,16,%i4
+/* 0x08c8 */ fdtox %f0,%f0
+/* 0x08cc */ std %f0,[%sp+2239]
+/* 0x08d0 */ srax %i0,32,%i1
+/* 0x08d4 */ add %o2,%o0,%i5
+/* 0x08d8 */ ldd [%o7+24],%f0
+/* 0x08dc */ and %i5,%o5,%l1
+/* 0x08e0 */ or %g0,72,%o2
+/* 0x08e4 */ or %g0,4,%o0
+/* 0x08e8 */ or %g0,4,%g5
+/* 0x08ec */ ldx [%sp+2239],%g1
+/* 0x08f0 */ fdtox %f0,%f0
+/* 0x08f4 */ or %g0,4,%i2
+/* 0x08f8 */ std %f0,[%sp+2231]
+/* 0x08fc */ ldd [%o7+40],%f2
+/* 0x0900 */ and %g1,%o5,%i3
+/* 0x0904 */ ldd [%o7+32],%f0
+/* 0x0908 */ srax %g1,32,%g1
+/* 0x090c */ ldd [%o7+56],%f4
+/* 0x0910 */ fdtox %f2,%f2
+/* 0x0914 */ ldx [%sp+2231],%g2
+/* 0x0918 */ fdtox %f0,%f0
+/* 0x091c */ st %l1,[%l2]
+/* 0x0920 */ srax %i5,32,%l1
+/* 0x0924 */ fdtox %f4,%f4
+/* 0x0928 */ std %f2,[%sp+2231]
+/* 0x092c */ and %g2,%o4,%i5
+/* 0x0930 */ add %i4,%l1,%i4
+/* 0x0934 */ std %f0,[%sp+2239]
+/* 0x0938 */ sllx %i5,16,%i0
+/* 0x093c */ add %i1,%i4,%i1
+/* 0x0940 */ ldd [%o7+48],%f2
+/* 0x0944 */ srax %g2,16,%g2
+/* 0x0948 */ add %i3,%i0,%i0
+/* 0x094c */ ldd [%o7+72],%f0
+/* 0x0950 */ add %i0,%i1,%i3
+/* 0x0954 */ srax %i3,32,%i4
+/* 0x0958 */ fdtox %f2,%f2
+/* 0x095c */ and %i3,%o5,%i3
+/* 0x0960 */ ldx [%sp+2231],%i1
+/* 0x0964 */ add %g2,%i4,%g2
+/* 0x0968 */ ldx [%sp+2239],%i0
+/* 0x096c */ add %g1,%g2,%g1
+/* 0x0970 */ std %f2,[%sp+2239]
+/* 0x0974 */ std %f4,[%sp+2231]
+/* 0x0978 */ ldd [%o7+64],%f2
+/* 0x097c */ st %i3,[%l2+4]
+ .L900000631:
+/* 0x0980 */ ldx [%sp+2231],%i3
+/* 0x0984 */ add %i2,2,%i2
+/* 0x0988 */ add %g4,4,%g4
+/* 0x098c */ ldx [%sp+2239],%i5
+/* 0x0990 */ add %o2,16,%o2
+/* 0x0994 */ and %i1,%o4,%g2
+/* 0x0998 */ sllx %g2,16,%i4
+/* 0x099c */ and %i0,%o5,%g2
+/* 0x09a0 */ ldd [%o7+%o2],%f4
+/* 0x09a4 */ fdtox %f0,%f0
+/* 0x09a8 */ std %f0,[%sp+2231]
+/* 0x09ac */ srax %i1,16,%i1
+/* 0x09b0 */ add %g2,%i4,%g2
+/* 0x09b4 */ fdtox %f2,%f0
+/* 0x09b8 */ add %o3,16,%o3
+/* 0x09bc */ std %f0,[%sp+2239]
+/* 0x09c0 */ add %g2,%g1,%g1
+/* 0x09c4 */ ldd [%o7+%o3],%f2
+/* 0x09c8 */ srax %g1,32,%i4
+/* 0x09cc */ cmp %i2,%o1
+/* 0x09d0 */ srax %i0,32,%g2
+/* 0x09d4 */ add %i1,%i4,%i0
+/* 0x09d8 */ add %g2,%i0,%i4
+/* 0x09dc */ add %o0,4,%o0
+/* 0x09e0 */ and %g1,%o5,%g2
+/* 0x09e4 */ or %g0,%i5,%g1
+/* 0x09e8 */ st %g2,[%l2+%o0]
+/* 0x09ec */ add %g5,4,%g5
+/* 0x09f0 */ ldx [%sp+2231],%i1
+/* 0x09f4 */ ldx [%sp+2239],%i0
+/* 0x09f8 */ add %o2,16,%o2
+/* 0x09fc */ and %i3,%o4,%g2
+/* 0x0a00 */ sllx %g2,16,%i5
+/* 0x0a04 */ and %g1,%o5,%g2
+/* 0x0a08 */ ldd [%o7+%o2],%f0
+/* 0x0a0c */ fdtox %f4,%f4
+/* 0x0a10 */ std %f4,[%sp+2231]
+/* 0x0a14 */ srax %i3,16,%i3
+/* 0x0a18 */ add %g2,%i5,%g2
+/* 0x0a1c */ fdtox %f2,%f2
+/* 0x0a20 */ add %o3,16,%o3
+/* 0x0a24 */ std %f2,[%sp+2239]
+/* 0x0a28 */ add %g2,%i4,%g2
+/* 0x0a2c */ ldd [%o7+%o3],%f2
+/* 0x0a30 */ srax %g2,32,%i4
+/* 0x0a34 */ srax %g1,32,%g1
+/* 0x0a38 */ add %i3,%i4,%i3
+/* 0x0a3c */ add %g1,%i3,%g1
+/* 0x0a40 */ add %o0,4,%o0
+/* 0x0a44 */ and %g2,%o5,%g2
+/* 0x0a48 */ ble,pt %icc,.L900000631
+/* 0x0a4c */ st %g2,[%l2+%o0]
+ .L900000634:
+/* 0x0a50 */ srax %i1,16,%i5
+/* 0x0a54 */ ldx [%sp+2231],%o1
+/* 0x0a58 */ and %i1,%o4,%i3
+/* 0x0a5c */ sllx %i3,16,%i3
+/* 0x0a60 */ ldx [%sp+2239],%i4
+/* 0x0a64 */ and %i0,%o5,%g2
+/* 0x0a68 */ add %g2,%i3,%g2
+/* 0x0a6c */ and %o1,%o4,%i3
+/* 0x0a70 */ fdtox %f0,%f4
+/* 0x0a74 */ sllx %i3,16,%i3
+/* 0x0a78 */ std %f4,[%sp+2231]
+/* 0x0a7c */ add %g2,%g1,%g2
+/* 0x0a80 */ srax %g2,32,%l1
+/* 0x0a84 */ and %i4,%o5,%i1
+/* 0x0a88 */ fdtox %f2,%f0
+/* 0x0a8c */ srax %i0,32,%g1
+/* 0x0a90 */ std %f0,[%sp+2239]
+/* 0x0a94 */ add %i5,%l1,%i0
+/* 0x0a98 */ srax %o1,16,%o1
+/* 0x0a9c */ add %g1,%i0,%i0
+/* 0x0aa0 */ add %o0,4,%g1
+/* 0x0aa4 */ add %i1,%i3,%o0
+/* 0x0aa8 */ and %g2,%o5,%g2
+/* 0x0aac */ st %g2,[%l2+%g1]
+/* 0x0ab0 */ add %o0,%i0,%o0
+/* 0x0ab4 */ srax %o0,32,%i3
+/* 0x0ab8 */ ldx [%sp+2231],%i1
+/* 0x0abc */ add %g1,4,%g1
+/* 0x0ac0 */ ldx [%sp+2239],%i0
+/* 0x0ac4 */ and %o0,%o5,%g2
+/* 0x0ac8 */ add %o1,%i3,%o1
+/* 0x0acc */ srax %i4,32,%o0
+/* 0x0ad0 */ cmp %i2,%g3
+/* 0x0ad4 */ st %g2,[%l2+%g1]
+/* 0x0ad8 */ bg,pn %icc,.L77000236
+/* 0x0adc */ add %o0,%o1,%g1
+/* 0x0ae0 */ add %g4,6,%g4
+/* 0x0ae4 */ add %g5,6,%g5
+ .L77000287:
+/* 0x0ae8 */ sra %g5,0,%o1
+ .L900000647:
+/* 0x0aec */ sllx %o1,3,%o2
+/* 0x0af0 */ and %i0,%o5,%o0
+/* 0x0af4 */ ldd [%o7+%o2],%f0
+/* 0x0af8 */ sra %g4,0,%o2
+/* 0x0afc */ and %i1,%o4,%o1
+/* 0x0b00 */ sllx %o2,3,%o2
+/* 0x0b04 */ add %g1,%o0,%o0
+/* 0x0b08 */ fdtox %f0,%f0
+/* 0x0b0c */ std %f0,[%sp+2239]
+/* 0x0b10 */ sllx %o1,16,%o1
+/* 0x0b14 */ add %o0,%o1,%o1
+/* 0x0b18 */ add %g5,2,%g5
+/* 0x0b1c */ ldd [%o7+%o2],%f0
+/* 0x0b20 */ srax %o1,32,%g1
+/* 0x0b24 */ and %o1,%o5,%o2
+/* 0x0b28 */ srax %i1,16,%o0
+/* 0x0b2c */ add %g4,2,%g4
+/* 0x0b30 */ fdtox %f0,%f0
+/* 0x0b34 */ std %f0,[%sp+2231]
+/* 0x0b38 */ sra %i2,0,%o1
+/* 0x0b3c */ sllx %o1,2,%o1
+/* 0x0b40 */ add %o0,%g1,%g2
+/* 0x0b44 */ srax %i0,32,%g1
+/* 0x0b48 */ add %i2,1,%i2
+/* 0x0b4c */ add %g1,%g2,%g1
+/* 0x0b50 */ cmp %i2,%g3
+/* 0x0b54 */ ldx [%sp+2239],%o3
+/* 0x0b58 */ ldx [%sp+2231],%i1
+/* 0x0b5c */ st %o2,[%l2+%o1]
+/* 0x0b60 */ or %g0,%o3,%i0
+/* 0x0b64 */ ble,pt %icc,.L900000647
+/* 0x0b68 */ sra %g5,0,%o1
+ .L77000236:
+/* 0x0b6c */ sethi %hi(0xfc00),%g2
+ .L900000648:
+/* 0x0b70 */ or %g0,-1,%o0
+/* 0x0b74 */ add %g2,1023,%g2
+/* 0x0b78 */ srl %o0,0,%g3
+/* 0x0b7c */ and %i1,%g2,%g2
+/* 0x0b80 */ and %i0,%g3,%g4
+/* 0x0b84 */ sllx %g2,16,%g2
+/* 0x0b88 */ add %g1,%g4,%g4
+/* 0x0b8c */ sra %i2,0,%g5
+/* 0x0b90 */ add %g4,%g2,%g4
+/* 0x0b94 */ sllx %g5,2,%g2
+/* 0x0b98 */ and %g4,%g3,%g3
+/* 0x0b9c */ st %g3,[%l2+%g2]
+
+! 317 ! adjust_montf_result(result,nint,nlen);
+
+/* 0x0ba0 317 */ sra %l0,0,%g4
+/* 0x0ba4 */ sllx %g4,2,%g2
+/* 0x0ba8 */ ld [%l2+%g2],%g2
+/* 0x0bac */ cmp %g2,0
+/* 0x0bb0 */ bleu,pn %icc,.L77000241
+/* 0x0bb4 */ or %g0,-1,%o1
+/* 0x0bb8 */ ba .L900000646
+/* 0x0bbc */ cmp %o1,0
+ .L77000241:
+/* 0x0bc0 */ sub %l0,1,%o1
+/* 0x0bc4 */ cmp %o1,0
+/* 0x0bc8 */ bl,pn %icc,.L77000244
+/* 0x0bcc */ sra %o1,0,%g2
+ .L900000645:
+/* 0x0bd0 */ sllx %g2,2,%g2
+/* 0x0bd4 */ sub %o1,1,%o0
+/* 0x0bd8 */ ld [%l3+%g2],%g3
+/* 0x0bdc */ ld [%l2+%g2],%g2
+/* 0x0be0 */ cmp %g2,%g3
+/* 0x0be4 */ bne,pn %icc,.L77000244
+/* 0x0be8 */ nop
+/* 0x0bec 0 */ or %g0,%o0,%o1
+/* 0x0bf0 317 */ cmp %o0,0
+/* 0x0bf4 */ bge,pt %icc,.L900000645
+/* 0x0bf8 */ sra %o1,0,%g2
+ .L77000244:
+/* 0x0bfc */ cmp %o1,0
+ .L900000646:
+/* 0x0c00 */ bl,pn %icc,.L77000288
+/* 0x0c04 */ sra %o1,0,%g2
+/* 0x0c08 */ sllx %g2,2,%g2
+/* 0x0c0c */ ld [%l3+%g2],%g3
+/* 0x0c10 */ ld [%l2+%g2],%g2
+/* 0x0c14 */ cmp %g2,%g3
+/* 0x0c18 */ bleu,pt %icc,.L77000224
+/* 0x0c1c */ nop
+ .L77000288:
+/* 0x0c20 */ cmp %l0,0
+/* 0x0c24 */ ble,pt %icc,.L77000224
+/* 0x0c28 */ nop
+/* 0x0c2c 317 */ or %g0,-1,%g2
+/* 0x0c30 315 */ or %g0,0,%i0
+/* 0x0c34 317 */ srl %g2,0,%g2
+/* 0x0c38 315 */ or %g0,0,%g4
+/* 0x0c3c */ or %g0,0,%o1
+/* 0x0c40 317 */ sub %l0,1,%g5
+/* 0x0c44 */ cmp %l0,9
+/* 0x0c48 315 */ or %g0,8,%o5
+/* 0x0c4c */ bl,pn %icc,.L77000289
+/* 0x0c50 */ sub %l0,4,%o7
+/* 0x0c54 */ ld [%l2],%o1
+/* 0x0c58 */ or %g0,5,%i0
+/* 0x0c5c */ ld [%l3],%o2
+/* 0x0c60 */ or %g0,12,%o4
+/* 0x0c64 */ or %g0,16,%g1
+/* 0x0c68 */ ld [%l3+4],%o3
+/* 0x0c6c */ ld [%l2+4],%o0
+/* 0x0c70 */ sub %o1,%o2,%o1
+/* 0x0c74 */ ld [%l3+8],%i1
+/* 0x0c78 */ and %o1,%g2,%g4
+/* 0x0c7c */ st %g4,[%l2]
+/* 0x0c80 */ srax %o1,32,%g4
+/* 0x0c84 */ sub %o0,%o3,%o0
+/* 0x0c88 */ ld [%l3+12],%o2
+/* 0x0c8c */ add %o0,%g4,%o0
+/* 0x0c90 */ and %o0,%g2,%g4
+/* 0x0c94 */ st %g4,[%l2+4]
+/* 0x0c98 */ srax %o0,32,%o0
+/* 0x0c9c */ ld [%l2+8],%o1
+/* 0x0ca0 */ ld [%l2+12],%o3
+/* 0x0ca4 */ sub %o1,%i1,%o1
+ .L900000635:
+/* 0x0ca8 */ add %g1,4,%g3
+/* 0x0cac */ ld [%g1+%l2],%g4
+/* 0x0cb0 */ add %o1,%o0,%o0
+/* 0x0cb4 */ ld [%l3+%g1],%i1
+/* 0x0cb8 */ sub %o3,%o2,%o1
+/* 0x0cbc */ and %o0,%g2,%o2
+/* 0x0cc0 */ st %o2,[%o5+%l2]
+/* 0x0cc4 */ srax %o0,32,%o2
+/* 0x0cc8 */ add %i0,4,%i0
+/* 0x0ccc */ add %g1,8,%o5
+/* 0x0cd0 */ ld [%g3+%l2],%o0
+/* 0x0cd4 */ add %o1,%o2,%o1
+/* 0x0cd8 */ ld [%l3+%g3],%o3
+/* 0x0cdc */ sub %g4,%i1,%o2
+/* 0x0ce0 */ and %o1,%g2,%g4
+/* 0x0ce4 */ st %g4,[%o4+%l2]
+/* 0x0ce8 */ srax %o1,32,%g4
+/* 0x0cec */ cmp %i0,%o7
+/* 0x0cf0 */ add %g1,12,%o4
+/* 0x0cf4 */ ld [%o5+%l2],%o1
+/* 0x0cf8 */ add %o2,%g4,%o2
+/* 0x0cfc */ ld [%l3+%o5],%i1
+/* 0x0d00 */ sub %o0,%o3,%o0
+/* 0x0d04 */ and %o2,%g2,%o3
+/* 0x0d08 */ st %o3,[%g1+%l2]
+/* 0x0d0c */ srax %o2,32,%g4
+/* 0x0d10 */ ld [%o4+%l2],%o3
+/* 0x0d14 */ add %g1,16,%g1
+/* 0x0d18 */ add %o0,%g4,%o0
+/* 0x0d1c */ ld [%l3+%o4],%o2
+/* 0x0d20 */ sub %o1,%i1,%o1
+/* 0x0d24 */ and %o0,%g2,%g4
+/* 0x0d28 */ st %g4,[%g3+%l2]
+/* 0x0d2c */ ble,pt %icc,.L900000635
+/* 0x0d30 */ srax %o0,32,%o0
+ .L900000638:
+/* 0x0d34 */ add %o1,%o0,%g3
+/* 0x0d38 */ sub %o3,%o2,%o1
+/* 0x0d3c */ ld [%g1+%l2],%o0
+/* 0x0d40 */ ld [%l3+%g1],%o2
+/* 0x0d44 */ srax %g3,32,%o7
+/* 0x0d48 */ and %g3,%g2,%o3
+/* 0x0d4c */ add %o1,%o7,%o1
+/* 0x0d50 */ st %o3,[%o5+%l2]
+/* 0x0d54 */ cmp %i0,%g5
+/* 0x0d58 */ sub %o0,%o2,%o0
+/* 0x0d5c */ and %o1,%g2,%o2
+/* 0x0d60 */ st %o2,[%o4+%l2]
+/* 0x0d64 */ srax %o1,32,%o1
+/* 0x0d68 */ sra %i0,0,%o2
+/* 0x0d6c */ add %o0,%o1,%o0
+/* 0x0d70 */ srax %o0,32,%g4
+/* 0x0d74 */ and %o0,%g2,%o1
+/* 0x0d78 */ st %o1,[%g1+%l2]
+/* 0x0d7c */ bg,pn %icc,.L77000224
+/* 0x0d80 */ sllx %o2,2,%o1
+ .L77000289:
+/* 0x0d84 0 */ or %g0,%o1,%g1
+ .L900000644:
+/* 0x0d88 */ ld [%o1+%l2],%o0
+/* 0x0d8c */ add %i0,1,%i0
+/* 0x0d90 */ ld [%l3+%o1],%o1
+/* 0x0d94 */ sra %i0,0,%o2
+/* 0x0d98 */ cmp %i0,%g5
+/* 0x0d9c */ add %g4,%o0,%o0
+/* 0x0da0 */ sub %o0,%o1,%o0
+/* 0x0da4 */ srax %o0,32,%g4
+/* 0x0da8 */ and %o0,%g2,%o1
+/* 0x0dac */ st %o1,[%g1+%l2]
+/* 0x0db0 */ sllx %o2,2,%o1
+/* 0x0db4 */ ble,pt %icc,.L900000644
+/* 0x0db8 */ or %g0,%o1,%g1
+ .L77000224:
+/* 0x0dbc */ ret ! Result =
+/* 0x0dc0 */ restore %g0,%g0,%g0
+/* 0x0dc4 0 */ .type mont_mulf_noconv,2
+/* 0x0dc4 */ .size mont_mulf_noconv,(.-mont_mulf_noconv)
+
diff --git a/security/nss/lib/freebl/mpi/mp_comba.c b/security/nss/lib/freebl/mpi/mp_comba.c
new file mode 100644
index 0000000000..3b4937b98a
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_comba.c
@@ -0,0 +1,3235 @@
+/*
+ * The below file is derived from TFM v0.03.
+ * It contains code from fp_mul_comba.c and
+ * fp_sqr_comba.c, which contained the following license.
+ *
+ * Right now, the assembly in this file limits
+ * this code to AMD 64.
+ *
+ * This file is public domain.
+ */
+
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+
+#include "mpi-priv.h"
+
+/* clamp digits */
+#define mp_clamp(a) \
+ { \
+ while ((a)->used && (a)->dp[(a)->used - 1] == 0) \
+ --((a)->used); \
+ (a)->sign = (a)->used ? (a)->sign : ZPOS; \
+ }
+
+/* anything you need at the start */
+#define COMBA_START
+
+/* clear the chaining variables */
+#define COMBA_CLEAR \
+ c0 = c1 = c2 = 0;
+
+/* forward the carry to the next digit */
+#define COMBA_FORWARD \
+ do { \
+ c0 = c1; \
+ c1 = c2; \
+ c2 = 0; \
+ } while (0);
+
+/* anything you need at the end */
+#define COMBA_FINI
+
+/* this should multiply i and j */
+#define MULADD(i, j) \
+ __asm__( \
+ "movq %6,%%rax \n\t" \
+ "mulq %7 \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ : "=r"(c0), "=r"(c1), "=r"(c2) \
+ : "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) \
+ : "%rax", "%rdx", "cc");
+
+/* sqr macros only */
+#define CLEAR_CARRY \
+ c0 = c1 = c2 = 0;
+
+#define COMBA_STORE(x) \
+ x = c0;
+
+#define COMBA_STORE2(x) \
+ x = c1;
+
+#define CARRY_FORWARD \
+ do { \
+ c0 = c1; \
+ c1 = c2; \
+ c2 = 0; \
+ } while (0);
+
+#define COMBA_FINI
+
+#define SQRADD(i, j) \
+ __asm__( \
+ "movq %6,%%rax \n\t" \
+ "mulq %%rax \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ : "=r"(c0), "=r"(c1), "=r"(c2) \
+ : "0"(c0), "1"(c1), "2"(c2), "g"(i) \
+ : "%rax", "%rdx", "cc");
+
+#define SQRADD2(i, j) \
+ __asm__( \
+ "movq %6,%%rax \n\t" \
+ "mulq %7 \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ : "=r"(c0), "=r"(c1), "=r"(c2) \
+ : "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) \
+ : "%rax", "%rdx", "cc");
+
+#define SQRADDSC(i, j) \
+ __asm__( \
+ "movq %3,%%rax \n\t" \
+ "mulq %4 \n\t" \
+ "movq %%rax,%0 \n\t" \
+ "movq %%rdx,%1 \n\t" \
+ "xorq %2,%2 \n\t" \
+ : "=r"(sc0), "=r"(sc1), "=r"(sc2) \
+ : "g"(i), "g"(j) \
+ : "%rax", "%rdx", "cc");
+
+#define SQRADDAC(i, j) \
+ __asm__( \
+ "movq %6,%%rax \n\t" \
+ "mulq %7 \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ : "=r"(sc0), "=r"(sc1), "=r"(sc2) \
+ : "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) \
+ : "%rax", "%rdx", "cc");
+
+#define SQRADDDB \
+ __asm__( \
+ "addq %6,%0 \n\t" \
+ "adcq %7,%1 \n\t" \
+ "adcq %8,%2 \n\t" \
+ "addq %6,%0 \n\t" \
+ "adcq %7,%1 \n\t" \
+ "adcq %8,%2 \n\t" \
+ : "=&r"(c0), "=&r"(c1), "=&r"(c2) \
+ : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) \
+ : "cc");
+
+void
+s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C)
+{
+ mp_digit c0, c1, c2, at[8];
+
+ memcpy(at, A->dp, 4 * sizeof(mp_digit));
+ memcpy(at + 4, B->dp, 4 * sizeof(mp_digit));
+ COMBA_START;
+
+ COMBA_CLEAR;
+ /* 0 */
+ MULADD(at[0], at[4]);
+ COMBA_STORE(C->dp[0]);
+ /* 1 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[5]);
+ MULADD(at[1], at[4]);
+ COMBA_STORE(C->dp[1]);
+ /* 2 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[6]);
+ MULADD(at[1], at[5]);
+ MULADD(at[2], at[4]);
+ COMBA_STORE(C->dp[2]);
+ /* 3 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[7]);
+ MULADD(at[1], at[6]);
+ MULADD(at[2], at[5]);
+ MULADD(at[3], at[4]);
+ COMBA_STORE(C->dp[3]);
+ /* 4 */
+ COMBA_FORWARD;
+ MULADD(at[1], at[7]);
+ MULADD(at[2], at[6]);
+ MULADD(at[3], at[5]);
+ COMBA_STORE(C->dp[4]);
+ /* 5 */
+ COMBA_FORWARD;
+ MULADD(at[2], at[7]);
+ MULADD(at[3], at[6]);
+ COMBA_STORE(C->dp[5]);
+ /* 6 */
+ COMBA_FORWARD;
+ MULADD(at[3], at[7]);
+ COMBA_STORE(C->dp[6]);
+ COMBA_STORE2(C->dp[7]);
+ C->used = 8;
+ C->sign = A->sign ^ B->sign;
+ mp_clamp(C);
+ COMBA_FINI;
+}
+
+void
+s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C)
+{
+ mp_digit c0, c1, c2, at[16];
+
+ memcpy(at, A->dp, 8 * sizeof(mp_digit));
+ memcpy(at + 8, B->dp, 8 * sizeof(mp_digit));
+ COMBA_START;
+
+ COMBA_CLEAR;
+ /* 0 */
+ MULADD(at[0], at[8]);
+ COMBA_STORE(C->dp[0]);
+ /* 1 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[9]);
+ MULADD(at[1], at[8]);
+ COMBA_STORE(C->dp[1]);
+ /* 2 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[10]);
+ MULADD(at[1], at[9]);
+ MULADD(at[2], at[8]);
+ COMBA_STORE(C->dp[2]);
+ /* 3 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[11]);
+ MULADD(at[1], at[10]);
+ MULADD(at[2], at[9]);
+ MULADD(at[3], at[8]);
+ COMBA_STORE(C->dp[3]);
+ /* 4 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[12]);
+ MULADD(at[1], at[11]);
+ MULADD(at[2], at[10]);
+ MULADD(at[3], at[9]);
+ MULADD(at[4], at[8]);
+ COMBA_STORE(C->dp[4]);
+ /* 5 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[13]);
+ MULADD(at[1], at[12]);
+ MULADD(at[2], at[11]);
+ MULADD(at[3], at[10]);
+ MULADD(at[4], at[9]);
+ MULADD(at[5], at[8]);
+ COMBA_STORE(C->dp[5]);
+ /* 6 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[14]);
+ MULADD(at[1], at[13]);
+ MULADD(at[2], at[12]);
+ MULADD(at[3], at[11]);
+ MULADD(at[4], at[10]);
+ MULADD(at[5], at[9]);
+ MULADD(at[6], at[8]);
+ COMBA_STORE(C->dp[6]);
+ /* 7 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[15]);
+ MULADD(at[1], at[14]);
+ MULADD(at[2], at[13]);
+ MULADD(at[3], at[12]);
+ MULADD(at[4], at[11]);
+ MULADD(at[5], at[10]);
+ MULADD(at[6], at[9]);
+ MULADD(at[7], at[8]);
+ COMBA_STORE(C->dp[7]);
+ /* 8 */
+ COMBA_FORWARD;
+ MULADD(at[1], at[15]);
+ MULADD(at[2], at[14]);
+ MULADD(at[3], at[13]);
+ MULADD(at[4], at[12]);
+ MULADD(at[5], at[11]);
+ MULADD(at[6], at[10]);
+ MULADD(at[7], at[9]);
+ COMBA_STORE(C->dp[8]);
+ /* 9 */
+ COMBA_FORWARD;
+ MULADD(at[2], at[15]);
+ MULADD(at[3], at[14]);
+ MULADD(at[4], at[13]);
+ MULADD(at[5], at[12]);
+ MULADD(at[6], at[11]);
+ MULADD(at[7], at[10]);
+ COMBA_STORE(C->dp[9]);
+ /* 10 */
+ COMBA_FORWARD;
+ MULADD(at[3], at[15]);
+ MULADD(at[4], at[14]);
+ MULADD(at[5], at[13]);
+ MULADD(at[6], at[12]);
+ MULADD(at[7], at[11]);
+ COMBA_STORE(C->dp[10]);
+ /* 11 */
+ COMBA_FORWARD;
+ MULADD(at[4], at[15]);
+ MULADD(at[5], at[14]);
+ MULADD(at[6], at[13]);
+ MULADD(at[7], at[12]);
+ COMBA_STORE(C->dp[11]);
+ /* 12 */
+ COMBA_FORWARD;
+ MULADD(at[5], at[15]);
+ MULADD(at[6], at[14]);
+ MULADD(at[7], at[13]);
+ COMBA_STORE(C->dp[12]);
+ /* 13 */
+ COMBA_FORWARD;
+ MULADD(at[6], at[15]);
+ MULADD(at[7], at[14]);
+ COMBA_STORE(C->dp[13]);
+ /* 14 */
+ COMBA_FORWARD;
+ MULADD(at[7], at[15]);
+ COMBA_STORE(C->dp[14]);
+ COMBA_STORE2(C->dp[15]);
+ C->used = 16;
+ C->sign = A->sign ^ B->sign;
+ mp_clamp(C);
+ COMBA_FINI;
+}
+
+void
+s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C)
+{
+ mp_digit c0, c1, c2, at[32];
+
+ memcpy(at, A->dp, 16 * sizeof(mp_digit));
+ memcpy(at + 16, B->dp, 16 * sizeof(mp_digit));
+ COMBA_START;
+
+ COMBA_CLEAR;
+ /* 0 */
+ MULADD(at[0], at[16]);
+ COMBA_STORE(C->dp[0]);
+ /* 1 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[17]);
+ MULADD(at[1], at[16]);
+ COMBA_STORE(C->dp[1]);
+ /* 2 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[18]);
+ MULADD(at[1], at[17]);
+ MULADD(at[2], at[16]);
+ COMBA_STORE(C->dp[2]);
+ /* 3 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[19]);
+ MULADD(at[1], at[18]);
+ MULADD(at[2], at[17]);
+ MULADD(at[3], at[16]);
+ COMBA_STORE(C->dp[3]);
+ /* 4 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[20]);
+ MULADD(at[1], at[19]);
+ MULADD(at[2], at[18]);
+ MULADD(at[3], at[17]);
+ MULADD(at[4], at[16]);
+ COMBA_STORE(C->dp[4]);
+ /* 5 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[21]);
+ MULADD(at[1], at[20]);
+ MULADD(at[2], at[19]);
+ MULADD(at[3], at[18]);
+ MULADD(at[4], at[17]);
+ MULADD(at[5], at[16]);
+ COMBA_STORE(C->dp[5]);
+ /* 6 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[22]);
+ MULADD(at[1], at[21]);
+ MULADD(at[2], at[20]);
+ MULADD(at[3], at[19]);
+ MULADD(at[4], at[18]);
+ MULADD(at[5], at[17]);
+ MULADD(at[6], at[16]);
+ COMBA_STORE(C->dp[6]);
+ /* 7 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[23]);
+ MULADD(at[1], at[22]);
+ MULADD(at[2], at[21]);
+ MULADD(at[3], at[20]);
+ MULADD(at[4], at[19]);
+ MULADD(at[5], at[18]);
+ MULADD(at[6], at[17]);
+ MULADD(at[7], at[16]);
+ COMBA_STORE(C->dp[7]);
+ /* 8 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[24]);
+ MULADD(at[1], at[23]);
+ MULADD(at[2], at[22]);
+ MULADD(at[3], at[21]);
+ MULADD(at[4], at[20]);
+ MULADD(at[5], at[19]);
+ MULADD(at[6], at[18]);
+ MULADD(at[7], at[17]);
+ MULADD(at[8], at[16]);
+ COMBA_STORE(C->dp[8]);
+ /* 9 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[25]);
+ MULADD(at[1], at[24]);
+ MULADD(at[2], at[23]);
+ MULADD(at[3], at[22]);
+ MULADD(at[4], at[21]);
+ MULADD(at[5], at[20]);
+ MULADD(at[6], at[19]);
+ MULADD(at[7], at[18]);
+ MULADD(at[8], at[17]);
+ MULADD(at[9], at[16]);
+ COMBA_STORE(C->dp[9]);
+ /* 10 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[26]);
+ MULADD(at[1], at[25]);
+ MULADD(at[2], at[24]);
+ MULADD(at[3], at[23]);
+ MULADD(at[4], at[22]);
+ MULADD(at[5], at[21]);
+ MULADD(at[6], at[20]);
+ MULADD(at[7], at[19]);
+ MULADD(at[8], at[18]);
+ MULADD(at[9], at[17]);
+ MULADD(at[10], at[16]);
+ COMBA_STORE(C->dp[10]);
+ /* 11 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[27]);
+ MULADD(at[1], at[26]);
+ MULADD(at[2], at[25]);
+ MULADD(at[3], at[24]);
+ MULADD(at[4], at[23]);
+ MULADD(at[5], at[22]);
+ MULADD(at[6], at[21]);
+ MULADD(at[7], at[20]);
+ MULADD(at[8], at[19]);
+ MULADD(at[9], at[18]);
+ MULADD(at[10], at[17]);
+ MULADD(at[11], at[16]);
+ COMBA_STORE(C->dp[11]);
+ /* 12 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[28]);
+ MULADD(at[1], at[27]);
+ MULADD(at[2], at[26]);
+ MULADD(at[3], at[25]);
+ MULADD(at[4], at[24]);
+ MULADD(at[5], at[23]);
+ MULADD(at[6], at[22]);
+ MULADD(at[7], at[21]);
+ MULADD(at[8], at[20]);
+ MULADD(at[9], at[19]);
+ MULADD(at[10], at[18]);
+ MULADD(at[11], at[17]);
+ MULADD(at[12], at[16]);
+ COMBA_STORE(C->dp[12]);
+ /* 13 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[29]);
+ MULADD(at[1], at[28]);
+ MULADD(at[2], at[27]);
+ MULADD(at[3], at[26]);
+ MULADD(at[4], at[25]);
+ MULADD(at[5], at[24]);
+ MULADD(at[6], at[23]);
+ MULADD(at[7], at[22]);
+ MULADD(at[8], at[21]);
+ MULADD(at[9], at[20]);
+ MULADD(at[10], at[19]);
+ MULADD(at[11], at[18]);
+ MULADD(at[12], at[17]);
+ MULADD(at[13], at[16]);
+ COMBA_STORE(C->dp[13]);
+ /* 14 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[30]);
+ MULADD(at[1], at[29]);
+ MULADD(at[2], at[28]);
+ MULADD(at[3], at[27]);
+ MULADD(at[4], at[26]);
+ MULADD(at[5], at[25]);
+ MULADD(at[6], at[24]);
+ MULADD(at[7], at[23]);
+ MULADD(at[8], at[22]);
+ MULADD(at[9], at[21]);
+ MULADD(at[10], at[20]);
+ MULADD(at[11], at[19]);
+ MULADD(at[12], at[18]);
+ MULADD(at[13], at[17]);
+ MULADD(at[14], at[16]);
+ COMBA_STORE(C->dp[14]);
+ /* 15 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[31]);
+ MULADD(at[1], at[30]);
+ MULADD(at[2], at[29]);
+ MULADD(at[3], at[28]);
+ MULADD(at[4], at[27]);
+ MULADD(at[5], at[26]);
+ MULADD(at[6], at[25]);
+ MULADD(at[7], at[24]);
+ MULADD(at[8], at[23]);
+ MULADD(at[9], at[22]);
+ MULADD(at[10], at[21]);
+ MULADD(at[11], at[20]);
+ MULADD(at[12], at[19]);
+ MULADD(at[13], at[18]);
+ MULADD(at[14], at[17]);
+ MULADD(at[15], at[16]);
+ COMBA_STORE(C->dp[15]);
+ /* 16 */
+ COMBA_FORWARD;
+ MULADD(at[1], at[31]);
+ MULADD(at[2], at[30]);
+ MULADD(at[3], at[29]);
+ MULADD(at[4], at[28]);
+ MULADD(at[5], at[27]);
+ MULADD(at[6], at[26]);
+ MULADD(at[7], at[25]);
+ MULADD(at[8], at[24]);
+ MULADD(at[9], at[23]);
+ MULADD(at[10], at[22]);
+ MULADD(at[11], at[21]);
+ MULADD(at[12], at[20]);
+ MULADD(at[13], at[19]);
+ MULADD(at[14], at[18]);
+ MULADD(at[15], at[17]);
+ COMBA_STORE(C->dp[16]);
+ /* 17 */
+ COMBA_FORWARD;
+ MULADD(at[2], at[31]);
+ MULADD(at[3], at[30]);
+ MULADD(at[4], at[29]);
+ MULADD(at[5], at[28]);
+ MULADD(at[6], at[27]);
+ MULADD(at[7], at[26]);
+ MULADD(at[8], at[25]);
+ MULADD(at[9], at[24]);
+ MULADD(at[10], at[23]);
+ MULADD(at[11], at[22]);
+ MULADD(at[12], at[21]);
+ MULADD(at[13], at[20]);
+ MULADD(at[14], at[19]);
+ MULADD(at[15], at[18]);
+ COMBA_STORE(C->dp[17]);
+ /* 18 */
+ COMBA_FORWARD;
+ MULADD(at[3], at[31]);
+ MULADD(at[4], at[30]);
+ MULADD(at[5], at[29]);
+ MULADD(at[6], at[28]);
+ MULADD(at[7], at[27]);
+ MULADD(at[8], at[26]);
+ MULADD(at[9], at[25]);
+ MULADD(at[10], at[24]);
+ MULADD(at[11], at[23]);
+ MULADD(at[12], at[22]);
+ MULADD(at[13], at[21]);
+ MULADD(at[14], at[20]);
+ MULADD(at[15], at[19]);
+ COMBA_STORE(C->dp[18]);
+ /* 19 */
+ COMBA_FORWARD;
+ MULADD(at[4], at[31]);
+ MULADD(at[5], at[30]);
+ MULADD(at[6], at[29]);
+ MULADD(at[7], at[28]);
+ MULADD(at[8], at[27]);
+ MULADD(at[9], at[26]);
+ MULADD(at[10], at[25]);
+ MULADD(at[11], at[24]);
+ MULADD(at[12], at[23]);
+ MULADD(at[13], at[22]);
+ MULADD(at[14], at[21]);
+ MULADD(at[15], at[20]);
+ COMBA_STORE(C->dp[19]);
+ /* 20 */
+ COMBA_FORWARD;
+ MULADD(at[5], at[31]);
+ MULADD(at[6], at[30]);
+ MULADD(at[7], at[29]);
+ MULADD(at[8], at[28]);
+ MULADD(at[9], at[27]);
+ MULADD(at[10], at[26]);
+ MULADD(at[11], at[25]);
+ MULADD(at[12], at[24]);
+ MULADD(at[13], at[23]);
+ MULADD(at[14], at[22]);
+ MULADD(at[15], at[21]);
+ COMBA_STORE(C->dp[20]);
+ /* 21 */
+ COMBA_FORWARD;
+ MULADD(at[6], at[31]);
+ MULADD(at[7], at[30]);
+ MULADD(at[8], at[29]);
+ MULADD(at[9], at[28]);
+ MULADD(at[10], at[27]);
+ MULADD(at[11], at[26]);
+ MULADD(at[12], at[25]);
+ MULADD(at[13], at[24]);
+ MULADD(at[14], at[23]);
+ MULADD(at[15], at[22]);
+ COMBA_STORE(C->dp[21]);
+ /* 22 */
+ COMBA_FORWARD;
+ MULADD(at[7], at[31]);
+ MULADD(at[8], at[30]);
+ MULADD(at[9], at[29]);
+ MULADD(at[10], at[28]);
+ MULADD(at[11], at[27]);
+ MULADD(at[12], at[26]);
+ MULADD(at[13], at[25]);
+ MULADD(at[14], at[24]);
+ MULADD(at[15], at[23]);
+ COMBA_STORE(C->dp[22]);
+ /* 23 */
+ COMBA_FORWARD;
+ MULADD(at[8], at[31]);
+ MULADD(at[9], at[30]);
+ MULADD(at[10], at[29]);
+ MULADD(at[11], at[28]);
+ MULADD(at[12], at[27]);
+ MULADD(at[13], at[26]);
+ MULADD(at[14], at[25]);
+ MULADD(at[15], at[24]);
+ COMBA_STORE(C->dp[23]);
+ /* 24 */
+ COMBA_FORWARD;
+ MULADD(at[9], at[31]);
+ MULADD(at[10], at[30]);
+ MULADD(at[11], at[29]);
+ MULADD(at[12], at[28]);
+ MULADD(at[13], at[27]);
+ MULADD(at[14], at[26]);
+ MULADD(at[15], at[25]);
+ COMBA_STORE(C->dp[24]);
+ /* 25 */
+ COMBA_FORWARD;
+ MULADD(at[10], at[31]);
+ MULADD(at[11], at[30]);
+ MULADD(at[12], at[29]);
+ MULADD(at[13], at[28]);
+ MULADD(at[14], at[27]);
+ MULADD(at[15], at[26]);
+ COMBA_STORE(C->dp[25]);
+ /* 26 */
+ COMBA_FORWARD;
+ MULADD(at[11], at[31]);
+ MULADD(at[12], at[30]);
+ MULADD(at[13], at[29]);
+ MULADD(at[14], at[28]);
+ MULADD(at[15], at[27]);
+ COMBA_STORE(C->dp[26]);
+ /* 27 */
+ COMBA_FORWARD;
+ MULADD(at[12], at[31]);
+ MULADD(at[13], at[30]);
+ MULADD(at[14], at[29]);
+ MULADD(at[15], at[28]);
+ COMBA_STORE(C->dp[27]);
+ /* 28 */
+ COMBA_FORWARD;
+ MULADD(at[13], at[31]);
+ MULADD(at[14], at[30]);
+ MULADD(at[15], at[29]);
+ COMBA_STORE(C->dp[28]);
+ /* 29 */
+ COMBA_FORWARD;
+ MULADD(at[14], at[31]);
+ MULADD(at[15], at[30]);
+ COMBA_STORE(C->dp[29]);
+ /* 30 */
+ COMBA_FORWARD;
+ MULADD(at[15], at[31]);
+ COMBA_STORE(C->dp[30]);
+ COMBA_STORE2(C->dp[31]);
+ C->used = 32;
+ C->sign = A->sign ^ B->sign;
+ mp_clamp(C);
+ COMBA_FINI;
+}
+
+void
+s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C)
+{
+ mp_digit c0, c1, c2, at[64];
+
+ memcpy(at, A->dp, 32 * sizeof(mp_digit));
+ memcpy(at + 32, B->dp, 32 * sizeof(mp_digit));
+ COMBA_START;
+
+ COMBA_CLEAR;
+ /* 0 */
+ MULADD(at[0], at[32]);
+ COMBA_STORE(C->dp[0]);
+ /* 1 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[33]);
+ MULADD(at[1], at[32]);
+ COMBA_STORE(C->dp[1]);
+ /* 2 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[34]);
+ MULADD(at[1], at[33]);
+ MULADD(at[2], at[32]);
+ COMBA_STORE(C->dp[2]);
+ /* 3 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[35]);
+ MULADD(at[1], at[34]);
+ MULADD(at[2], at[33]);
+ MULADD(at[3], at[32]);
+ COMBA_STORE(C->dp[3]);
+ /* 4 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[36]);
+ MULADD(at[1], at[35]);
+ MULADD(at[2], at[34]);
+ MULADD(at[3], at[33]);
+ MULADD(at[4], at[32]);
+ COMBA_STORE(C->dp[4]);
+ /* 5 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[37]);
+ MULADD(at[1], at[36]);
+ MULADD(at[2], at[35]);
+ MULADD(at[3], at[34]);
+ MULADD(at[4], at[33]);
+ MULADD(at[5], at[32]);
+ COMBA_STORE(C->dp[5]);
+ /* 6 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[38]);
+ MULADD(at[1], at[37]);
+ MULADD(at[2], at[36]);
+ MULADD(at[3], at[35]);
+ MULADD(at[4], at[34]);
+ MULADD(at[5], at[33]);
+ MULADD(at[6], at[32]);
+ COMBA_STORE(C->dp[6]);
+ /* 7 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[39]);
+ MULADD(at[1], at[38]);
+ MULADD(at[2], at[37]);
+ MULADD(at[3], at[36]);
+ MULADD(at[4], at[35]);
+ MULADD(at[5], at[34]);
+ MULADD(at[6], at[33]);
+ MULADD(at[7], at[32]);
+ COMBA_STORE(C->dp[7]);
+ /* 8 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[40]);
+ MULADD(at[1], at[39]);
+ MULADD(at[2], at[38]);
+ MULADD(at[3], at[37]);
+ MULADD(at[4], at[36]);
+ MULADD(at[5], at[35]);
+ MULADD(at[6], at[34]);
+ MULADD(at[7], at[33]);
+ MULADD(at[8], at[32]);
+ COMBA_STORE(C->dp[8]);
+ /* 9 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[41]);
+ MULADD(at[1], at[40]);
+ MULADD(at[2], at[39]);
+ MULADD(at[3], at[38]);
+ MULADD(at[4], at[37]);
+ MULADD(at[5], at[36]);
+ MULADD(at[6], at[35]);
+ MULADD(at[7], at[34]);
+ MULADD(at[8], at[33]);
+ MULADD(at[9], at[32]);
+ COMBA_STORE(C->dp[9]);
+ /* 10 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[42]);
+ MULADD(at[1], at[41]);
+ MULADD(at[2], at[40]);
+ MULADD(at[3], at[39]);
+ MULADD(at[4], at[38]);
+ MULADD(at[5], at[37]);
+ MULADD(at[6], at[36]);
+ MULADD(at[7], at[35]);
+ MULADD(at[8], at[34]);
+ MULADD(at[9], at[33]);
+ MULADD(at[10], at[32]);
+ COMBA_STORE(C->dp[10]);
+ /* 11 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[43]);
+ MULADD(at[1], at[42]);
+ MULADD(at[2], at[41]);
+ MULADD(at[3], at[40]);
+ MULADD(at[4], at[39]);
+ MULADD(at[5], at[38]);
+ MULADD(at[6], at[37]);
+ MULADD(at[7], at[36]);
+ MULADD(at[8], at[35]);
+ MULADD(at[9], at[34]);
+ MULADD(at[10], at[33]);
+ MULADD(at[11], at[32]);
+ COMBA_STORE(C->dp[11]);
+ /* 12 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[44]);
+ MULADD(at[1], at[43]);
+ MULADD(at[2], at[42]);
+ MULADD(at[3], at[41]);
+ MULADD(at[4], at[40]);
+ MULADD(at[5], at[39]);
+ MULADD(at[6], at[38]);
+ MULADD(at[7], at[37]);
+ MULADD(at[8], at[36]);
+ MULADD(at[9], at[35]);
+ MULADD(at[10], at[34]);
+ MULADD(at[11], at[33]);
+ MULADD(at[12], at[32]);
+ COMBA_STORE(C->dp[12]);
+ /* 13 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[45]);
+ MULADD(at[1], at[44]);
+ MULADD(at[2], at[43]);
+ MULADD(at[3], at[42]);
+ MULADD(at[4], at[41]);
+ MULADD(at[5], at[40]);
+ MULADD(at[6], at[39]);
+ MULADD(at[7], at[38]);
+ MULADD(at[8], at[37]);
+ MULADD(at[9], at[36]);
+ MULADD(at[10], at[35]);
+ MULADD(at[11], at[34]);
+ MULADD(at[12], at[33]);
+ MULADD(at[13], at[32]);
+ COMBA_STORE(C->dp[13]);
+ /* 14 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[46]);
+ MULADD(at[1], at[45]);
+ MULADD(at[2], at[44]);
+ MULADD(at[3], at[43]);
+ MULADD(at[4], at[42]);
+ MULADD(at[5], at[41]);
+ MULADD(at[6], at[40]);
+ MULADD(at[7], at[39]);
+ MULADD(at[8], at[38]);
+ MULADD(at[9], at[37]);
+ MULADD(at[10], at[36]);
+ MULADD(at[11], at[35]);
+ MULADD(at[12], at[34]);
+ MULADD(at[13], at[33]);
+ MULADD(at[14], at[32]);
+ COMBA_STORE(C->dp[14]);
+ /* 15 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[47]);
+ MULADD(at[1], at[46]);
+ MULADD(at[2], at[45]);
+ MULADD(at[3], at[44]);
+ MULADD(at[4], at[43]);
+ MULADD(at[5], at[42]);
+ MULADD(at[6], at[41]);
+ MULADD(at[7], at[40]);
+ MULADD(at[8], at[39]);
+ MULADD(at[9], at[38]);
+ MULADD(at[10], at[37]);
+ MULADD(at[11], at[36]);
+ MULADD(at[12], at[35]);
+ MULADD(at[13], at[34]);
+ MULADD(at[14], at[33]);
+ MULADD(at[15], at[32]);
+ COMBA_STORE(C->dp[15]);
+ /* 16 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[48]);
+ MULADD(at[1], at[47]);
+ MULADD(at[2], at[46]);
+ MULADD(at[3], at[45]);
+ MULADD(at[4], at[44]);
+ MULADD(at[5], at[43]);
+ MULADD(at[6], at[42]);
+ MULADD(at[7], at[41]);
+ MULADD(at[8], at[40]);
+ MULADD(at[9], at[39]);
+ MULADD(at[10], at[38]);
+ MULADD(at[11], at[37]);
+ MULADD(at[12], at[36]);
+ MULADD(at[13], at[35]);
+ MULADD(at[14], at[34]);
+ MULADD(at[15], at[33]);
+ MULADD(at[16], at[32]);
+ COMBA_STORE(C->dp[16]);
+ /* 17 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[49]);
+ MULADD(at[1], at[48]);
+ MULADD(at[2], at[47]);
+ MULADD(at[3], at[46]);
+ MULADD(at[4], at[45]);
+ MULADD(at[5], at[44]);
+ MULADD(at[6], at[43]);
+ MULADD(at[7], at[42]);
+ MULADD(at[8], at[41]);
+ MULADD(at[9], at[40]);
+ MULADD(at[10], at[39]);
+ MULADD(at[11], at[38]);
+ MULADD(at[12], at[37]);
+ MULADD(at[13], at[36]);
+ MULADD(at[14], at[35]);
+ MULADD(at[15], at[34]);
+ MULADD(at[16], at[33]);
+ MULADD(at[17], at[32]);
+ COMBA_STORE(C->dp[17]);
+ /* 18 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[50]);
+ MULADD(at[1], at[49]);
+ MULADD(at[2], at[48]);
+ MULADD(at[3], at[47]);
+ MULADD(at[4], at[46]);
+ MULADD(at[5], at[45]);
+ MULADD(at[6], at[44]);
+ MULADD(at[7], at[43]);
+ MULADD(at[8], at[42]);
+ MULADD(at[9], at[41]);
+ MULADD(at[10], at[40]);
+ MULADD(at[11], at[39]);
+ MULADD(at[12], at[38]);
+ MULADD(at[13], at[37]);
+ MULADD(at[14], at[36]);
+ MULADD(at[15], at[35]);
+ MULADD(at[16], at[34]);
+ MULADD(at[17], at[33]);
+ MULADD(at[18], at[32]);
+ COMBA_STORE(C->dp[18]);
+ /* 19 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[51]);
+ MULADD(at[1], at[50]);
+ MULADD(at[2], at[49]);
+ MULADD(at[3], at[48]);
+ MULADD(at[4], at[47]);
+ MULADD(at[5], at[46]);
+ MULADD(at[6], at[45]);
+ MULADD(at[7], at[44]);
+ MULADD(at[8], at[43]);
+ MULADD(at[9], at[42]);
+ MULADD(at[10], at[41]);
+ MULADD(at[11], at[40]);
+ MULADD(at[12], at[39]);
+ MULADD(at[13], at[38]);
+ MULADD(at[14], at[37]);
+ MULADD(at[15], at[36]);
+ MULADD(at[16], at[35]);
+ MULADD(at[17], at[34]);
+ MULADD(at[18], at[33]);
+ MULADD(at[19], at[32]);
+ COMBA_STORE(C->dp[19]);
+ /* 20 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[52]);
+ MULADD(at[1], at[51]);
+ MULADD(at[2], at[50]);
+ MULADD(at[3], at[49]);
+ MULADD(at[4], at[48]);
+ MULADD(at[5], at[47]);
+ MULADD(at[6], at[46]);
+ MULADD(at[7], at[45]);
+ MULADD(at[8], at[44]);
+ MULADD(at[9], at[43]);
+ MULADD(at[10], at[42]);
+ MULADD(at[11], at[41]);
+ MULADD(at[12], at[40]);
+ MULADD(at[13], at[39]);
+ MULADD(at[14], at[38]);
+ MULADD(at[15], at[37]);
+ MULADD(at[16], at[36]);
+ MULADD(at[17], at[35]);
+ MULADD(at[18], at[34]);
+ MULADD(at[19], at[33]);
+ MULADD(at[20], at[32]);
+ COMBA_STORE(C->dp[20]);
+ /* 21 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[53]);
+ MULADD(at[1], at[52]);
+ MULADD(at[2], at[51]);
+ MULADD(at[3], at[50]);
+ MULADD(at[4], at[49]);
+ MULADD(at[5], at[48]);
+ MULADD(at[6], at[47]);
+ MULADD(at[7], at[46]);
+ MULADD(at[8], at[45]);
+ MULADD(at[9], at[44]);
+ MULADD(at[10], at[43]);
+ MULADD(at[11], at[42]);
+ MULADD(at[12], at[41]);
+ MULADD(at[13], at[40]);
+ MULADD(at[14], at[39]);
+ MULADD(at[15], at[38]);
+ MULADD(at[16], at[37]);
+ MULADD(at[17], at[36]);
+ MULADD(at[18], at[35]);
+ MULADD(at[19], at[34]);
+ MULADD(at[20], at[33]);
+ MULADD(at[21], at[32]);
+ COMBA_STORE(C->dp[21]);
+ /* 22 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[54]);
+ MULADD(at[1], at[53]);
+ MULADD(at[2], at[52]);
+ MULADD(at[3], at[51]);
+ MULADD(at[4], at[50]);
+ MULADD(at[5], at[49]);
+ MULADD(at[6], at[48]);
+ MULADD(at[7], at[47]);
+ MULADD(at[8], at[46]);
+ MULADD(at[9], at[45]);
+ MULADD(at[10], at[44]);
+ MULADD(at[11], at[43]);
+ MULADD(at[12], at[42]);
+ MULADD(at[13], at[41]);
+ MULADD(at[14], at[40]);
+ MULADD(at[15], at[39]);
+ MULADD(at[16], at[38]);
+ MULADD(at[17], at[37]);
+ MULADD(at[18], at[36]);
+ MULADD(at[19], at[35]);
+ MULADD(at[20], at[34]);
+ MULADD(at[21], at[33]);
+ MULADD(at[22], at[32]);
+ COMBA_STORE(C->dp[22]);
+ /* 23 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[55]);
+ MULADD(at[1], at[54]);
+ MULADD(at[2], at[53]);
+ MULADD(at[3], at[52]);
+ MULADD(at[4], at[51]);
+ MULADD(at[5], at[50]);
+ MULADD(at[6], at[49]);
+ MULADD(at[7], at[48]);
+ MULADD(at[8], at[47]);
+ MULADD(at[9], at[46]);
+ MULADD(at[10], at[45]);
+ MULADD(at[11], at[44]);
+ MULADD(at[12], at[43]);
+ MULADD(at[13], at[42]);
+ MULADD(at[14], at[41]);
+ MULADD(at[15], at[40]);
+ MULADD(at[16], at[39]);
+ MULADD(at[17], at[38]);
+ MULADD(at[18], at[37]);
+ MULADD(at[19], at[36]);
+ MULADD(at[20], at[35]);
+ MULADD(at[21], at[34]);
+ MULADD(at[22], at[33]);
+ MULADD(at[23], at[32]);
+ COMBA_STORE(C->dp[23]);
+ /* 24 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[56]);
+ MULADD(at[1], at[55]);
+ MULADD(at[2], at[54]);
+ MULADD(at[3], at[53]);
+ MULADD(at[4], at[52]);
+ MULADD(at[5], at[51]);
+ MULADD(at[6], at[50]);
+ MULADD(at[7], at[49]);
+ MULADD(at[8], at[48]);
+ MULADD(at[9], at[47]);
+ MULADD(at[10], at[46]);
+ MULADD(at[11], at[45]);
+ MULADD(at[12], at[44]);
+ MULADD(at[13], at[43]);
+ MULADD(at[14], at[42]);
+ MULADD(at[15], at[41]);
+ MULADD(at[16], at[40]);
+ MULADD(at[17], at[39]);
+ MULADD(at[18], at[38]);
+ MULADD(at[19], at[37]);
+ MULADD(at[20], at[36]);
+ MULADD(at[21], at[35]);
+ MULADD(at[22], at[34]);
+ MULADD(at[23], at[33]);
+ MULADD(at[24], at[32]);
+ COMBA_STORE(C->dp[24]);
+ /* 25 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[57]);
+ MULADD(at[1], at[56]);
+ MULADD(at[2], at[55]);
+ MULADD(at[3], at[54]);
+ MULADD(at[4], at[53]);
+ MULADD(at[5], at[52]);
+ MULADD(at[6], at[51]);
+ MULADD(at[7], at[50]);
+ MULADD(at[8], at[49]);
+ MULADD(at[9], at[48]);
+ MULADD(at[10], at[47]);
+ MULADD(at[11], at[46]);
+ MULADD(at[12], at[45]);
+ MULADD(at[13], at[44]);
+ MULADD(at[14], at[43]);
+ MULADD(at[15], at[42]);
+ MULADD(at[16], at[41]);
+ MULADD(at[17], at[40]);
+ MULADD(at[18], at[39]);
+ MULADD(at[19], at[38]);
+ MULADD(at[20], at[37]);
+ MULADD(at[21], at[36]);
+ MULADD(at[22], at[35]);
+ MULADD(at[23], at[34]);
+ MULADD(at[24], at[33]);
+ MULADD(at[25], at[32]);
+ COMBA_STORE(C->dp[25]);
+ /* 26 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[58]);
+ MULADD(at[1], at[57]);
+ MULADD(at[2], at[56]);
+ MULADD(at[3], at[55]);
+ MULADD(at[4], at[54]);
+ MULADD(at[5], at[53]);
+ MULADD(at[6], at[52]);
+ MULADD(at[7], at[51]);
+ MULADD(at[8], at[50]);
+ MULADD(at[9], at[49]);
+ MULADD(at[10], at[48]);
+ MULADD(at[11], at[47]);
+ MULADD(at[12], at[46]);
+ MULADD(at[13], at[45]);
+ MULADD(at[14], at[44]);
+ MULADD(at[15], at[43]);
+ MULADD(at[16], at[42]);
+ MULADD(at[17], at[41]);
+ MULADD(at[18], at[40]);
+ MULADD(at[19], at[39]);
+ MULADD(at[20], at[38]);
+ MULADD(at[21], at[37]);
+ MULADD(at[22], at[36]);
+ MULADD(at[23], at[35]);
+ MULADD(at[24], at[34]);
+ MULADD(at[25], at[33]);
+ MULADD(at[26], at[32]);
+ COMBA_STORE(C->dp[26]);
+ /* 27 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[59]);
+ MULADD(at[1], at[58]);
+ MULADD(at[2], at[57]);
+ MULADD(at[3], at[56]);
+ MULADD(at[4], at[55]);
+ MULADD(at[5], at[54]);
+ MULADD(at[6], at[53]);
+ MULADD(at[7], at[52]);
+ MULADD(at[8], at[51]);
+ MULADD(at[9], at[50]);
+ MULADD(at[10], at[49]);
+ MULADD(at[11], at[48]);
+ MULADD(at[12], at[47]);
+ MULADD(at[13], at[46]);
+ MULADD(at[14], at[45]);
+ MULADD(at[15], at[44]);
+ MULADD(at[16], at[43]);
+ MULADD(at[17], at[42]);
+ MULADD(at[18], at[41]);
+ MULADD(at[19], at[40]);
+ MULADD(at[20], at[39]);
+ MULADD(at[21], at[38]);
+ MULADD(at[22], at[37]);
+ MULADD(at[23], at[36]);
+ MULADD(at[24], at[35]);
+ MULADD(at[25], at[34]);
+ MULADD(at[26], at[33]);
+ MULADD(at[27], at[32]);
+ COMBA_STORE(C->dp[27]);
+ /* 28 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[60]);
+ MULADD(at[1], at[59]);
+ MULADD(at[2], at[58]);
+ MULADD(at[3], at[57]);
+ MULADD(at[4], at[56]);
+ MULADD(at[5], at[55]);
+ MULADD(at[6], at[54]);
+ MULADD(at[7], at[53]);
+ MULADD(at[8], at[52]);
+ MULADD(at[9], at[51]);
+ MULADD(at[10], at[50]);
+ MULADD(at[11], at[49]);
+ MULADD(at[12], at[48]);
+ MULADD(at[13], at[47]);
+ MULADD(at[14], at[46]);
+ MULADD(at[15], at[45]);
+ MULADD(at[16], at[44]);
+ MULADD(at[17], at[43]);
+ MULADD(at[18], at[42]);
+ MULADD(at[19], at[41]);
+ MULADD(at[20], at[40]);
+ MULADD(at[21], at[39]);
+ MULADD(at[22], at[38]);
+ MULADD(at[23], at[37]);
+ MULADD(at[24], at[36]);
+ MULADD(at[25], at[35]);
+ MULADD(at[26], at[34]);
+ MULADD(at[27], at[33]);
+ MULADD(at[28], at[32]);
+ COMBA_STORE(C->dp[28]);
+ /* 29 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[61]);
+ MULADD(at[1], at[60]);
+ MULADD(at[2], at[59]);
+ MULADD(at[3], at[58]);
+ MULADD(at[4], at[57]);
+ MULADD(at[5], at[56]);
+ MULADD(at[6], at[55]);
+ MULADD(at[7], at[54]);
+ MULADD(at[8], at[53]);
+ MULADD(at[9], at[52]);
+ MULADD(at[10], at[51]);
+ MULADD(at[11], at[50]);
+ MULADD(at[12], at[49]);
+ MULADD(at[13], at[48]);
+ MULADD(at[14], at[47]);
+ MULADD(at[15], at[46]);
+ MULADD(at[16], at[45]);
+ MULADD(at[17], at[44]);
+ MULADD(at[18], at[43]);
+ MULADD(at[19], at[42]);
+ MULADD(at[20], at[41]);
+ MULADD(at[21], at[40]);
+ MULADD(at[22], at[39]);
+ MULADD(at[23], at[38]);
+ MULADD(at[24], at[37]);
+ MULADD(at[25], at[36]);
+ MULADD(at[26], at[35]);
+ MULADD(at[27], at[34]);
+ MULADD(at[28], at[33]);
+ MULADD(at[29], at[32]);
+ COMBA_STORE(C->dp[29]);
+ /* 30 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[62]);
+ MULADD(at[1], at[61]);
+ MULADD(at[2], at[60]);
+ MULADD(at[3], at[59]);
+ MULADD(at[4], at[58]);
+ MULADD(at[5], at[57]);
+ MULADD(at[6], at[56]);
+ MULADD(at[7], at[55]);
+ MULADD(at[8], at[54]);
+ MULADD(at[9], at[53]);
+ MULADD(at[10], at[52]);
+ MULADD(at[11], at[51]);
+ MULADD(at[12], at[50]);
+ MULADD(at[13], at[49]);
+ MULADD(at[14], at[48]);
+ MULADD(at[15], at[47]);
+ MULADD(at[16], at[46]);
+ MULADD(at[17], at[45]);
+ MULADD(at[18], at[44]);
+ MULADD(at[19], at[43]);
+ MULADD(at[20], at[42]);
+ MULADD(at[21], at[41]);
+ MULADD(at[22], at[40]);
+ MULADD(at[23], at[39]);
+ MULADD(at[24], at[38]);
+ MULADD(at[25], at[37]);
+ MULADD(at[26], at[36]);
+ MULADD(at[27], at[35]);
+ MULADD(at[28], at[34]);
+ MULADD(at[29], at[33]);
+ MULADD(at[30], at[32]);
+ COMBA_STORE(C->dp[30]);
+ /* 31 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[63]);
+ MULADD(at[1], at[62]);
+ MULADD(at[2], at[61]);
+ MULADD(at[3], at[60]);
+ MULADD(at[4], at[59]);
+ MULADD(at[5], at[58]);
+ MULADD(at[6], at[57]);
+ MULADD(at[7], at[56]);
+ MULADD(at[8], at[55]);
+ MULADD(at[9], at[54]);
+ MULADD(at[10], at[53]);
+ MULADD(at[11], at[52]);
+ MULADD(at[12], at[51]);
+ MULADD(at[13], at[50]);
+ MULADD(at[14], at[49]);
+ MULADD(at[15], at[48]);
+ MULADD(at[16], at[47]);
+ MULADD(at[17], at[46]);
+ MULADD(at[18], at[45]);
+ MULADD(at[19], at[44]);
+ MULADD(at[20], at[43]);
+ MULADD(at[21], at[42]);
+ MULADD(at[22], at[41]);
+ MULADD(at[23], at[40]);
+ MULADD(at[24], at[39]);
+ MULADD(at[25], at[38]);
+ MULADD(at[26], at[37]);
+ MULADD(at[27], at[36]);
+ MULADD(at[28], at[35]);
+ MULADD(at[29], at[34]);
+ MULADD(at[30], at[33]);
+ MULADD(at[31], at[32]);
+ COMBA_STORE(C->dp[31]);
+ /* 32 */
+ COMBA_FORWARD;
+ MULADD(at[1], at[63]);
+ MULADD(at[2], at[62]);
+ MULADD(at[3], at[61]);
+ MULADD(at[4], at[60]);
+ MULADD(at[5], at[59]);
+ MULADD(at[6], at[58]);
+ MULADD(at[7], at[57]);
+ MULADD(at[8], at[56]);
+ MULADD(at[9], at[55]);
+ MULADD(at[10], at[54]);
+ MULADD(at[11], at[53]);
+ MULADD(at[12], at[52]);
+ MULADD(at[13], at[51]);
+ MULADD(at[14], at[50]);
+ MULADD(at[15], at[49]);
+ MULADD(at[16], at[48]);
+ MULADD(at[17], at[47]);
+ MULADD(at[18], at[46]);
+ MULADD(at[19], at[45]);
+ MULADD(at[20], at[44]);
+ MULADD(at[21], at[43]);
+ MULADD(at[22], at[42]);
+ MULADD(at[23], at[41]);
+ MULADD(at[24], at[40]);
+ MULADD(at[25], at[39]);
+ MULADD(at[26], at[38]);
+ MULADD(at[27], at[37]);
+ MULADD(at[28], at[36]);
+ MULADD(at[29], at[35]);
+ MULADD(at[30], at[34]);
+ MULADD(at[31], at[33]);
+ COMBA_STORE(C->dp[32]);
+ /* 33 */
+ COMBA_FORWARD;
+ MULADD(at[2], at[63]);
+ MULADD(at[3], at[62]);
+ MULADD(at[4], at[61]);
+ MULADD(at[5], at[60]);
+ MULADD(at[6], at[59]);
+ MULADD(at[7], at[58]);
+ MULADD(at[8], at[57]);
+ MULADD(at[9], at[56]);
+ MULADD(at[10], at[55]);
+ MULADD(at[11], at[54]);
+ MULADD(at[12], at[53]);
+ MULADD(at[13], at[52]);
+ MULADD(at[14], at[51]);
+ MULADD(at[15], at[50]);
+ MULADD(at[16], at[49]);
+ MULADD(at[17], at[48]);
+ MULADD(at[18], at[47]);
+ MULADD(at[19], at[46]);
+ MULADD(at[20], at[45]);
+ MULADD(at[21], at[44]);
+ MULADD(at[22], at[43]);
+ MULADD(at[23], at[42]);
+ MULADD(at[24], at[41]);
+ MULADD(at[25], at[40]);
+ MULADD(at[26], at[39]);
+ MULADD(at[27], at[38]);
+ MULADD(at[28], at[37]);
+ MULADD(at[29], at[36]);
+ MULADD(at[30], at[35]);
+ MULADD(at[31], at[34]);
+ COMBA_STORE(C->dp[33]);
+ /* 34 */
+ COMBA_FORWARD;
+ MULADD(at[3], at[63]);
+ MULADD(at[4], at[62]);
+ MULADD(at[5], at[61]);
+ MULADD(at[6], at[60]);
+ MULADD(at[7], at[59]);
+ MULADD(at[8], at[58]);
+ MULADD(at[9], at[57]);
+ MULADD(at[10], at[56]);
+ MULADD(at[11], at[55]);
+ MULADD(at[12], at[54]);
+ MULADD(at[13], at[53]);
+ MULADD(at[14], at[52]);
+ MULADD(at[15], at[51]);
+ MULADD(at[16], at[50]);
+ MULADD(at[17], at[49]);
+ MULADD(at[18], at[48]);
+ MULADD(at[19], at[47]);
+ MULADD(at[20], at[46]);
+ MULADD(at[21], at[45]);
+ MULADD(at[22], at[44]);
+ MULADD(at[23], at[43]);
+ MULADD(at[24], at[42]);
+ MULADD(at[25], at[41]);
+ MULADD(at[26], at[40]);
+ MULADD(at[27], at[39]);
+ MULADD(at[28], at[38]);
+ MULADD(at[29], at[37]);
+ MULADD(at[30], at[36]);
+ MULADD(at[31], at[35]);
+ COMBA_STORE(C->dp[34]);
+ /* 35 */
+ COMBA_FORWARD;
+ MULADD(at[4], at[63]);
+ MULADD(at[5], at[62]);
+ MULADD(at[6], at[61]);
+ MULADD(at[7], at[60]);
+ MULADD(at[8], at[59]);
+ MULADD(at[9], at[58]);
+ MULADD(at[10], at[57]);
+ MULADD(at[11], at[56]);
+ MULADD(at[12], at[55]);
+ MULADD(at[13], at[54]);
+ MULADD(at[14], at[53]);
+ MULADD(at[15], at[52]);
+ MULADD(at[16], at[51]);
+ MULADD(at[17], at[50]);
+ MULADD(at[18], at[49]);
+ MULADD(at[19], at[48]);
+ MULADD(at[20], at[47]);
+ MULADD(at[21], at[46]);
+ MULADD(at[22], at[45]);
+ MULADD(at[23], at[44]);
+ MULADD(at[24], at[43]);
+ MULADD(at[25], at[42]);
+ MULADD(at[26], at[41]);
+ MULADD(at[27], at[40]);
+ MULADD(at[28], at[39]);
+ MULADD(at[29], at[38]);
+ MULADD(at[30], at[37]);
+ MULADD(at[31], at[36]);
+ COMBA_STORE(C->dp[35]);
+ /* 36 */
+ COMBA_FORWARD;
+ MULADD(at[5], at[63]);
+ MULADD(at[6], at[62]);
+ MULADD(at[7], at[61]);
+ MULADD(at[8], at[60]);
+ MULADD(at[9], at[59]);
+ MULADD(at[10], at[58]);
+ MULADD(at[11], at[57]);
+ MULADD(at[12], at[56]);
+ MULADD(at[13], at[55]);
+ MULADD(at[14], at[54]);
+ MULADD(at[15], at[53]);
+ MULADD(at[16], at[52]);
+ MULADD(at[17], at[51]);
+ MULADD(at[18], at[50]);
+ MULADD(at[19], at[49]);
+ MULADD(at[20], at[48]);
+ MULADD(at[21], at[47]);
+ MULADD(at[22], at[46]);
+ MULADD(at[23], at[45]);
+ MULADD(at[24], at[44]);
+ MULADD(at[25], at[43]);
+ MULADD(at[26], at[42]);
+ MULADD(at[27], at[41]);
+ MULADD(at[28], at[40]);
+ MULADD(at[29], at[39]);
+ MULADD(at[30], at[38]);
+ MULADD(at[31], at[37]);
+ COMBA_STORE(C->dp[36]);
+ /* 37 */
+ COMBA_FORWARD;
+ MULADD(at[6], at[63]);
+ MULADD(at[7], at[62]);
+ MULADD(at[8], at[61]);
+ MULADD(at[9], at[60]);
+ MULADD(at[10], at[59]);
+ MULADD(at[11], at[58]);
+ MULADD(at[12], at[57]);
+ MULADD(at[13], at[56]);
+ MULADD(at[14], at[55]);
+ MULADD(at[15], at[54]);
+ MULADD(at[16], at[53]);
+ MULADD(at[17], at[52]);
+ MULADD(at[18], at[51]);
+ MULADD(at[19], at[50]);
+ MULADD(at[20], at[49]);
+ MULADD(at[21], at[48]);
+ MULADD(at[22], at[47]);
+ MULADD(at[23], at[46]);
+ MULADD(at[24], at[45]);
+ MULADD(at[25], at[44]);
+ MULADD(at[26], at[43]);
+ MULADD(at[27], at[42]);
+ MULADD(at[28], at[41]);
+ MULADD(at[29], at[40]);
+ MULADD(at[30], at[39]);
+ MULADD(at[31], at[38]);
+ COMBA_STORE(C->dp[37]);
+ /* 38 */
+ COMBA_FORWARD;
+ MULADD(at[7], at[63]);
+ MULADD(at[8], at[62]);
+ MULADD(at[9], at[61]);
+ MULADD(at[10], at[60]);
+ MULADD(at[11], at[59]);
+ MULADD(at[12], at[58]);
+ MULADD(at[13], at[57]);
+ MULADD(at[14], at[56]);
+ MULADD(at[15], at[55]);
+ MULADD(at[16], at[54]);
+ MULADD(at[17], at[53]);
+ MULADD(at[18], at[52]);
+ MULADD(at[19], at[51]);
+ MULADD(at[20], at[50]);
+ MULADD(at[21], at[49]);
+ MULADD(at[22], at[48]);
+ MULADD(at[23], at[47]);
+ MULADD(at[24], at[46]);
+ MULADD(at[25], at[45]);
+ MULADD(at[26], at[44]);
+ MULADD(at[27], at[43]);
+ MULADD(at[28], at[42]);
+ MULADD(at[29], at[41]);
+ MULADD(at[30], at[40]);
+ MULADD(at[31], at[39]);
+ COMBA_STORE(C->dp[38]);
+ /* 39 */
+ COMBA_FORWARD;
+ MULADD(at[8], at[63]);
+ MULADD(at[9], at[62]);
+ MULADD(at[10], at[61]);
+ MULADD(at[11], at[60]);
+ MULADD(at[12], at[59]);
+ MULADD(at[13], at[58]);
+ MULADD(at[14], at[57]);
+ MULADD(at[15], at[56]);
+ MULADD(at[16], at[55]);
+ MULADD(at[17], at[54]);
+ MULADD(at[18], at[53]);
+ MULADD(at[19], at[52]);
+ MULADD(at[20], at[51]);
+ MULADD(at[21], at[50]);
+ MULADD(at[22], at[49]);
+ MULADD(at[23], at[48]);
+ MULADD(at[24], at[47]);
+ MULADD(at[25], at[46]);
+ MULADD(at[26], at[45]);
+ MULADD(at[27], at[44]);
+ MULADD(at[28], at[43]);
+ MULADD(at[29], at[42]);
+ MULADD(at[30], at[41]);
+ MULADD(at[31], at[40]);
+ COMBA_STORE(C->dp[39]);
+ /* 40 */
+ COMBA_FORWARD;
+ MULADD(at[9], at[63]);
+ MULADD(at[10], at[62]);
+ MULADD(at[11], at[61]);
+ MULADD(at[12], at[60]);
+ MULADD(at[13], at[59]);
+ MULADD(at[14], at[58]);
+ MULADD(at[15], at[57]);
+ MULADD(at[16], at[56]);
+ MULADD(at[17], at[55]);
+ MULADD(at[18], at[54]);
+ MULADD(at[19], at[53]);
+ MULADD(at[20], at[52]);
+ MULADD(at[21], at[51]);
+ MULADD(at[22], at[50]);
+ MULADD(at[23], at[49]);
+ MULADD(at[24], at[48]);
+ MULADD(at[25], at[47]);
+ MULADD(at[26], at[46]);
+ MULADD(at[27], at[45]);
+ MULADD(at[28], at[44]);
+ MULADD(at[29], at[43]);
+ MULADD(at[30], at[42]);
+ MULADD(at[31], at[41]);
+ COMBA_STORE(C->dp[40]);
+ /* 41 */
+ COMBA_FORWARD;
+ MULADD(at[10], at[63]);
+ MULADD(at[11], at[62]);
+ MULADD(at[12], at[61]);
+ MULADD(at[13], at[60]);
+ MULADD(at[14], at[59]);
+ MULADD(at[15], at[58]);
+ MULADD(at[16], at[57]);
+ MULADD(at[17], at[56]);
+ MULADD(at[18], at[55]);
+ MULADD(at[19], at[54]);
+ MULADD(at[20], at[53]);
+ MULADD(at[21], at[52]);
+ MULADD(at[22], at[51]);
+ MULADD(at[23], at[50]);
+ MULADD(at[24], at[49]);
+ MULADD(at[25], at[48]);
+ MULADD(at[26], at[47]);
+ MULADD(at[27], at[46]);
+ MULADD(at[28], at[45]);
+ MULADD(at[29], at[44]);
+ MULADD(at[30], at[43]);
+ MULADD(at[31], at[42]);
+ COMBA_STORE(C->dp[41]);
+ /* 42 */
+ COMBA_FORWARD;
+ MULADD(at[11], at[63]);
+ MULADD(at[12], at[62]);
+ MULADD(at[13], at[61]);
+ MULADD(at[14], at[60]);
+ MULADD(at[15], at[59]);
+ MULADD(at[16], at[58]);
+ MULADD(at[17], at[57]);
+ MULADD(at[18], at[56]);
+ MULADD(at[19], at[55]);
+ MULADD(at[20], at[54]);
+ MULADD(at[21], at[53]);
+ MULADD(at[22], at[52]);
+ MULADD(at[23], at[51]);
+ MULADD(at[24], at[50]);
+ MULADD(at[25], at[49]);
+ MULADD(at[26], at[48]);
+ MULADD(at[27], at[47]);
+ MULADD(at[28], at[46]);
+ MULADD(at[29], at[45]);
+ MULADD(at[30], at[44]);
+ MULADD(at[31], at[43]);
+ COMBA_STORE(C->dp[42]);
+ /* 43 */
+ COMBA_FORWARD;
+ MULADD(at[12], at[63]);
+ MULADD(at[13], at[62]);
+ MULADD(at[14], at[61]);
+ MULADD(at[15], at[60]);
+ MULADD(at[16], at[59]);
+ MULADD(at[17], at[58]);
+ MULADD(at[18], at[57]);
+ MULADD(at[19], at[56]);
+ MULADD(at[20], at[55]);
+ MULADD(at[21], at[54]);
+ MULADD(at[22], at[53]);
+ MULADD(at[23], at[52]);
+ MULADD(at[24], at[51]);
+ MULADD(at[25], at[50]);
+ MULADD(at[26], at[49]);
+ MULADD(at[27], at[48]);
+ MULADD(at[28], at[47]);
+ MULADD(at[29], at[46]);
+ MULADD(at[30], at[45]);
+ MULADD(at[31], at[44]);
+ COMBA_STORE(C->dp[43]);
+ /* 44 */
+ COMBA_FORWARD;
+ MULADD(at[13], at[63]);
+ MULADD(at[14], at[62]);
+ MULADD(at[15], at[61]);
+ MULADD(at[16], at[60]);
+ MULADD(at[17], at[59]);
+ MULADD(at[18], at[58]);
+ MULADD(at[19], at[57]);
+ MULADD(at[20], at[56]);
+ MULADD(at[21], at[55]);
+ MULADD(at[22], at[54]);
+ MULADD(at[23], at[53]);
+ MULADD(at[24], at[52]);
+ MULADD(at[25], at[51]);
+ MULADD(at[26], at[50]);
+ MULADD(at[27], at[49]);
+ MULADD(at[28], at[48]);
+ MULADD(at[29], at[47]);
+ MULADD(at[30], at[46]);
+ MULADD(at[31], at[45]);
+ COMBA_STORE(C->dp[44]);
+ /* 45 */
+ COMBA_FORWARD;
+ MULADD(at[14], at[63]);
+ MULADD(at[15], at[62]);
+ MULADD(at[16], at[61]);
+ MULADD(at[17], at[60]);
+ MULADD(at[18], at[59]);
+ MULADD(at[19], at[58]);
+ MULADD(at[20], at[57]);
+ MULADD(at[21], at[56]);
+ MULADD(at[22], at[55]);
+ MULADD(at[23], at[54]);
+ MULADD(at[24], at[53]);
+ MULADD(at[25], at[52]);
+ MULADD(at[26], at[51]);
+ MULADD(at[27], at[50]);
+ MULADD(at[28], at[49]);
+ MULADD(at[29], at[48]);
+ MULADD(at[30], at[47]);
+ MULADD(at[31], at[46]);
+ COMBA_STORE(C->dp[45]);
+ /* 46 */
+ COMBA_FORWARD;
+ MULADD(at[15], at[63]);
+ MULADD(at[16], at[62]);
+ MULADD(at[17], at[61]);
+ MULADD(at[18], at[60]);
+ MULADD(at[19], at[59]);
+ MULADD(at[20], at[58]);
+ MULADD(at[21], at[57]);
+ MULADD(at[22], at[56]);
+ MULADD(at[23], at[55]);
+ MULADD(at[24], at[54]);
+ MULADD(at[25], at[53]);
+ MULADD(at[26], at[52]);
+ MULADD(at[27], at[51]);
+ MULADD(at[28], at[50]);
+ MULADD(at[29], at[49]);
+ MULADD(at[30], at[48]);
+ MULADD(at[31], at[47]);
+ COMBA_STORE(C->dp[46]);
+ /* 47 */
+ COMBA_FORWARD;
+ MULADD(at[16], at[63]);
+ MULADD(at[17], at[62]);
+ MULADD(at[18], at[61]);
+ MULADD(at[19], at[60]);
+ MULADD(at[20], at[59]);
+ MULADD(at[21], at[58]);
+ MULADD(at[22], at[57]);
+ MULADD(at[23], at[56]);
+ MULADD(at[24], at[55]);
+ MULADD(at[25], at[54]);
+ MULADD(at[26], at[53]);
+ MULADD(at[27], at[52]);
+ MULADD(at[28], at[51]);
+ MULADD(at[29], at[50]);
+ MULADD(at[30], at[49]);
+ MULADD(at[31], at[48]);
+ COMBA_STORE(C->dp[47]);
+ /* 48 */
+ COMBA_FORWARD;
+ MULADD(at[17], at[63]);
+ MULADD(at[18], at[62]);
+ MULADD(at[19], at[61]);
+ MULADD(at[20], at[60]);
+ MULADD(at[21], at[59]);
+ MULADD(at[22], at[58]);
+ MULADD(at[23], at[57]);
+ MULADD(at[24], at[56]);
+ MULADD(at[25], at[55]);
+ MULADD(at[26], at[54]);
+ MULADD(at[27], at[53]);
+ MULADD(at[28], at[52]);
+ MULADD(at[29], at[51]);
+ MULADD(at[30], at[50]);
+ MULADD(at[31], at[49]);
+ COMBA_STORE(C->dp[48]);
+ /* 49 */
+ COMBA_FORWARD;
+ MULADD(at[18], at[63]);
+ MULADD(at[19], at[62]);
+ MULADD(at[20], at[61]);
+ MULADD(at[21], at[60]);
+ MULADD(at[22], at[59]);
+ MULADD(at[23], at[58]);
+ MULADD(at[24], at[57]);
+ MULADD(at[25], at[56]);
+ MULADD(at[26], at[55]);
+ MULADD(at[27], at[54]);
+ MULADD(at[28], at[53]);
+ MULADD(at[29], at[52]);
+ MULADD(at[30], at[51]);
+ MULADD(at[31], at[50]);
+ COMBA_STORE(C->dp[49]);
+ /* 50 */
+ COMBA_FORWARD;
+ MULADD(at[19], at[63]);
+ MULADD(at[20], at[62]);
+ MULADD(at[21], at[61]);
+ MULADD(at[22], at[60]);
+ MULADD(at[23], at[59]);
+ MULADD(at[24], at[58]);
+ MULADD(at[25], at[57]);
+ MULADD(at[26], at[56]);
+ MULADD(at[27], at[55]);
+ MULADD(at[28], at[54]);
+ MULADD(at[29], at[53]);
+ MULADD(at[30], at[52]);
+ MULADD(at[31], at[51]);
+ COMBA_STORE(C->dp[50]);
+ /* 51 */
+ COMBA_FORWARD;
+ MULADD(at[20], at[63]);
+ MULADD(at[21], at[62]);
+ MULADD(at[22], at[61]);
+ MULADD(at[23], at[60]);
+ MULADD(at[24], at[59]);
+ MULADD(at[25], at[58]);
+ MULADD(at[26], at[57]);
+ MULADD(at[27], at[56]);
+ MULADD(at[28], at[55]);
+ MULADD(at[29], at[54]);
+ MULADD(at[30], at[53]);
+ MULADD(at[31], at[52]);
+ COMBA_STORE(C->dp[51]);
+ /* 52 */
+ COMBA_FORWARD;
+ MULADD(at[21], at[63]);
+ MULADD(at[22], at[62]);
+ MULADD(at[23], at[61]);
+ MULADD(at[24], at[60]);
+ MULADD(at[25], at[59]);
+ MULADD(at[26], at[58]);
+ MULADD(at[27], at[57]);
+ MULADD(at[28], at[56]);
+ MULADD(at[29], at[55]);
+ MULADD(at[30], at[54]);
+ MULADD(at[31], at[53]);
+ COMBA_STORE(C->dp[52]);
+ /* 53 */
+ COMBA_FORWARD;
+ MULADD(at[22], at[63]);
+ MULADD(at[23], at[62]);
+ MULADD(at[24], at[61]);
+ MULADD(at[25], at[60]);
+ MULADD(at[26], at[59]);
+ MULADD(at[27], at[58]);
+ MULADD(at[28], at[57]);
+ MULADD(at[29], at[56]);
+ MULADD(at[30], at[55]);
+ MULADD(at[31], at[54]);
+ COMBA_STORE(C->dp[53]);
+ /* 54 */
+ COMBA_FORWARD;
+ MULADD(at[23], at[63]);
+ MULADD(at[24], at[62]);
+ MULADD(at[25], at[61]);
+ MULADD(at[26], at[60]);
+ MULADD(at[27], at[59]);
+ MULADD(at[28], at[58]);
+ MULADD(at[29], at[57]);
+ MULADD(at[30], at[56]);
+ MULADD(at[31], at[55]);
+ COMBA_STORE(C->dp[54]);
+ /* 55 */
+ COMBA_FORWARD;
+ MULADD(at[24], at[63]);
+ MULADD(at[25], at[62]);
+ MULADD(at[26], at[61]);
+ MULADD(at[27], at[60]);
+ MULADD(at[28], at[59]);
+ MULADD(at[29], at[58]);
+ MULADD(at[30], at[57]);
+ MULADD(at[31], at[56]);
+ COMBA_STORE(C->dp[55]);
+ /* 56 */
+ COMBA_FORWARD;
+ MULADD(at[25], at[63]);
+ MULADD(at[26], at[62]);
+ MULADD(at[27], at[61]);
+ MULADD(at[28], at[60]);
+ MULADD(at[29], at[59]);
+ MULADD(at[30], at[58]);
+ MULADD(at[31], at[57]);
+ COMBA_STORE(C->dp[56]);
+ /* 57 */
+ COMBA_FORWARD;
+ MULADD(at[26], at[63]);
+ MULADD(at[27], at[62]);
+ MULADD(at[28], at[61]);
+ MULADD(at[29], at[60]);
+ MULADD(at[30], at[59]);
+ MULADD(at[31], at[58]);
+ COMBA_STORE(C->dp[57]);
+ /* 58 */
+ COMBA_FORWARD;
+ MULADD(at[27], at[63]);
+ MULADD(at[28], at[62]);
+ MULADD(at[29], at[61]);
+ MULADD(at[30], at[60]);
+ MULADD(at[31], at[59]);
+ COMBA_STORE(C->dp[58]);
+ /* 59 */
+ COMBA_FORWARD;
+ MULADD(at[28], at[63]);
+ MULADD(at[29], at[62]);
+ MULADD(at[30], at[61]);
+ MULADD(at[31], at[60]);
+ COMBA_STORE(C->dp[59]);
+ /* 60 */
+ COMBA_FORWARD;
+ MULADD(at[29], at[63]);
+ MULADD(at[30], at[62]);
+ MULADD(at[31], at[61]);
+ COMBA_STORE(C->dp[60]);
+ /* 61 */
+ COMBA_FORWARD;
+ MULADD(at[30], at[63]);
+ MULADD(at[31], at[62]);
+ COMBA_STORE(C->dp[61]);
+ /* 62 */
+ COMBA_FORWARD;
+ MULADD(at[31], at[63]);
+ COMBA_STORE(C->dp[62]);
+ COMBA_STORE2(C->dp[63]);
+ C->used = 64;
+ C->sign = A->sign ^ B->sign;
+ mp_clamp(C);
+ COMBA_FINI;
+}
+
+void
+s_mp_sqr_comba_4(const mp_int *A, mp_int *B)
+{
+ mp_digit *a, b[8], c0, c1, c2;
+
+ a = A->dp;
+ COMBA_START;
+
+ /* clear carries */
+ CLEAR_CARRY;
+
+ /* output 0 */
+ SQRADD(a[0], a[0]);
+ COMBA_STORE(b[0]);
+
+ /* output 1 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[1]);
+ COMBA_STORE(b[1]);
+
+ /* output 2 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[2]);
+ SQRADD(a[1], a[1]);
+ COMBA_STORE(b[2]);
+
+ /* output 3 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[3]);
+ SQRADD2(a[1], a[2]);
+ COMBA_STORE(b[3]);
+
+ /* output 4 */
+ CARRY_FORWARD;
+ SQRADD2(a[1], a[3]);
+ SQRADD(a[2], a[2]);
+ COMBA_STORE(b[4]);
+
+ /* output 5 */
+ CARRY_FORWARD;
+ SQRADD2(a[2], a[3]);
+ COMBA_STORE(b[5]);
+
+ /* output 6 */
+ CARRY_FORWARD;
+ SQRADD(a[3], a[3]);
+ COMBA_STORE(b[6]);
+ COMBA_STORE2(b[7]);
+ COMBA_FINI;
+
+ B->used = 8;
+ B->sign = ZPOS;
+ memcpy(B->dp, b, 8 * sizeof(mp_digit));
+ mp_clamp(B);
+}
+
+void
+s_mp_sqr_comba_8(const mp_int *A, mp_int *B)
+{
+ mp_digit *a, b[16], c0, c1, c2, sc0, sc1, sc2;
+
+ a = A->dp;
+ COMBA_START;
+
+ /* clear carries */
+ CLEAR_CARRY;
+
+ /* output 0 */
+ SQRADD(a[0], a[0]);
+ COMBA_STORE(b[0]);
+
+ /* output 1 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[1]);
+ COMBA_STORE(b[1]);
+
+ /* output 2 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[2]);
+ SQRADD(a[1], a[1]);
+ COMBA_STORE(b[2]);
+
+ /* output 3 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[3]);
+ SQRADD2(a[1], a[2]);
+ COMBA_STORE(b[3]);
+
+ /* output 4 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[4]);
+ SQRADD2(a[1], a[3]);
+ SQRADD(a[2], a[2]);
+ COMBA_STORE(b[4]);
+
+ /* output 5 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[5]);
+ SQRADDAC(a[1], a[4]);
+ SQRADDAC(a[2], a[3]);
+ SQRADDDB;
+ COMBA_STORE(b[5]);
+
+ /* output 6 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[6]);
+ SQRADDAC(a[1], a[5]);
+ SQRADDAC(a[2], a[4]);
+ SQRADDDB;
+ SQRADD(a[3], a[3]);
+ COMBA_STORE(b[6]);
+
+ /* output 7 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[7]);
+ SQRADDAC(a[1], a[6]);
+ SQRADDAC(a[2], a[5]);
+ SQRADDAC(a[3], a[4]);
+ SQRADDDB;
+ COMBA_STORE(b[7]);
+
+ /* output 8 */
+ CARRY_FORWARD;
+ SQRADDSC(a[1], a[7]);
+ SQRADDAC(a[2], a[6]);
+ SQRADDAC(a[3], a[5]);
+ SQRADDDB;
+ SQRADD(a[4], a[4]);
+ COMBA_STORE(b[8]);
+
+ /* output 9 */
+ CARRY_FORWARD;
+ SQRADDSC(a[2], a[7]);
+ SQRADDAC(a[3], a[6]);
+ SQRADDAC(a[4], a[5]);
+ SQRADDDB;
+ COMBA_STORE(b[9]);
+
+ /* output 10 */
+ CARRY_FORWARD;
+ SQRADD2(a[3], a[7]);
+ SQRADD2(a[4], a[6]);
+ SQRADD(a[5], a[5]);
+ COMBA_STORE(b[10]);
+
+ /* output 11 */
+ CARRY_FORWARD;
+ SQRADD2(a[4], a[7]);
+ SQRADD2(a[5], a[6]);
+ COMBA_STORE(b[11]);
+
+ /* output 12 */
+ CARRY_FORWARD;
+ SQRADD2(a[5], a[7]);
+ SQRADD(a[6], a[6]);
+ COMBA_STORE(b[12]);
+
+ /* output 13 */
+ CARRY_FORWARD;
+ SQRADD2(a[6], a[7]);
+ COMBA_STORE(b[13]);
+
+ /* output 14 */
+ CARRY_FORWARD;
+ SQRADD(a[7], a[7]);
+ COMBA_STORE(b[14]);
+ COMBA_STORE2(b[15]);
+ COMBA_FINI;
+
+ B->used = 16;
+ B->sign = ZPOS;
+ memcpy(B->dp, b, 16 * sizeof(mp_digit));
+ mp_clamp(B);
+}
+
+void
+s_mp_sqr_comba_16(const mp_int *A, mp_int *B)
+{
+ mp_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2;
+
+ a = A->dp;
+ COMBA_START;
+
+ /* clear carries */
+ CLEAR_CARRY;
+
+ /* output 0 */
+ SQRADD(a[0], a[0]);
+ COMBA_STORE(b[0]);
+
+ /* output 1 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[1]);
+ COMBA_STORE(b[1]);
+
+ /* output 2 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[2]);
+ SQRADD(a[1], a[1]);
+ COMBA_STORE(b[2]);
+
+ /* output 3 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[3]);
+ SQRADD2(a[1], a[2]);
+ COMBA_STORE(b[3]);
+
+ /* output 4 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[4]);
+ SQRADD2(a[1], a[3]);
+ SQRADD(a[2], a[2]);
+ COMBA_STORE(b[4]);
+
+ /* output 5 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[5]);
+ SQRADDAC(a[1], a[4]);
+ SQRADDAC(a[2], a[3]);
+ SQRADDDB;
+ COMBA_STORE(b[5]);
+
+ /* output 6 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[6]);
+ SQRADDAC(a[1], a[5]);
+ SQRADDAC(a[2], a[4]);
+ SQRADDDB;
+ SQRADD(a[3], a[3]);
+ COMBA_STORE(b[6]);
+
+ /* output 7 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[7]);
+ SQRADDAC(a[1], a[6]);
+ SQRADDAC(a[2], a[5]);
+ SQRADDAC(a[3], a[4]);
+ SQRADDDB;
+ COMBA_STORE(b[7]);
+
+ /* output 8 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[8]);
+ SQRADDAC(a[1], a[7]);
+ SQRADDAC(a[2], a[6]);
+ SQRADDAC(a[3], a[5]);
+ SQRADDDB;
+ SQRADD(a[4], a[4]);
+ COMBA_STORE(b[8]);
+
+ /* output 9 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[9]);
+ SQRADDAC(a[1], a[8]);
+ SQRADDAC(a[2], a[7]);
+ SQRADDAC(a[3], a[6]);
+ SQRADDAC(a[4], a[5]);
+ SQRADDDB;
+ COMBA_STORE(b[9]);
+
+ /* output 10 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[10]);
+ SQRADDAC(a[1], a[9]);
+ SQRADDAC(a[2], a[8]);
+ SQRADDAC(a[3], a[7]);
+ SQRADDAC(a[4], a[6]);
+ SQRADDDB;
+ SQRADD(a[5], a[5]);
+ COMBA_STORE(b[10]);
+
+ /* output 11 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[11]);
+ SQRADDAC(a[1], a[10]);
+ SQRADDAC(a[2], a[9]);
+ SQRADDAC(a[3], a[8]);
+ SQRADDAC(a[4], a[7]);
+ SQRADDAC(a[5], a[6]);
+ SQRADDDB;
+ COMBA_STORE(b[11]);
+
+ /* output 12 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[12]);
+ SQRADDAC(a[1], a[11]);
+ SQRADDAC(a[2], a[10]);
+ SQRADDAC(a[3], a[9]);
+ SQRADDAC(a[4], a[8]);
+ SQRADDAC(a[5], a[7]);
+ SQRADDDB;
+ SQRADD(a[6], a[6]);
+ COMBA_STORE(b[12]);
+
+ /* output 13 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[13]);
+ SQRADDAC(a[1], a[12]);
+ SQRADDAC(a[2], a[11]);
+ SQRADDAC(a[3], a[10]);
+ SQRADDAC(a[4], a[9]);
+ SQRADDAC(a[5], a[8]);
+ SQRADDAC(a[6], a[7]);
+ SQRADDDB;
+ COMBA_STORE(b[13]);
+
+ /* output 14 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[14]);
+ SQRADDAC(a[1], a[13]);
+ SQRADDAC(a[2], a[12]);
+ SQRADDAC(a[3], a[11]);
+ SQRADDAC(a[4], a[10]);
+ SQRADDAC(a[5], a[9]);
+ SQRADDAC(a[6], a[8]);
+ SQRADDDB;
+ SQRADD(a[7], a[7]);
+ COMBA_STORE(b[14]);
+
+ /* output 15 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[15]);
+ SQRADDAC(a[1], a[14]);
+ SQRADDAC(a[2], a[13]);
+ SQRADDAC(a[3], a[12]);
+ SQRADDAC(a[4], a[11]);
+ SQRADDAC(a[5], a[10]);
+ SQRADDAC(a[6], a[9]);
+ SQRADDAC(a[7], a[8]);
+ SQRADDDB;
+ COMBA_STORE(b[15]);
+
+ /* output 16 */
+ CARRY_FORWARD;
+ SQRADDSC(a[1], a[15]);
+ SQRADDAC(a[2], a[14]);
+ SQRADDAC(a[3], a[13]);
+ SQRADDAC(a[4], a[12]);
+ SQRADDAC(a[5], a[11]);
+ SQRADDAC(a[6], a[10]);
+ SQRADDAC(a[7], a[9]);
+ SQRADDDB;
+ SQRADD(a[8], a[8]);
+ COMBA_STORE(b[16]);
+
+ /* output 17 */
+ CARRY_FORWARD;
+ SQRADDSC(a[2], a[15]);
+ SQRADDAC(a[3], a[14]);
+ SQRADDAC(a[4], a[13]);
+ SQRADDAC(a[5], a[12]);
+ SQRADDAC(a[6], a[11]);
+ SQRADDAC(a[7], a[10]);
+ SQRADDAC(a[8], a[9]);
+ SQRADDDB;
+ COMBA_STORE(b[17]);
+
+ /* output 18 */
+ CARRY_FORWARD;
+ SQRADDSC(a[3], a[15]);
+ SQRADDAC(a[4], a[14]);
+ SQRADDAC(a[5], a[13]);
+ SQRADDAC(a[6], a[12]);
+ SQRADDAC(a[7], a[11]);
+ SQRADDAC(a[8], a[10]);
+ SQRADDDB;
+ SQRADD(a[9], a[9]);
+ COMBA_STORE(b[18]);
+
+ /* output 19 */
+ CARRY_FORWARD;
+ SQRADDSC(a[4], a[15]);
+ SQRADDAC(a[5], a[14]);
+ SQRADDAC(a[6], a[13]);
+ SQRADDAC(a[7], a[12]);
+ SQRADDAC(a[8], a[11]);
+ SQRADDAC(a[9], a[10]);
+ SQRADDDB;
+ COMBA_STORE(b[19]);
+
+ /* output 20 */
+ CARRY_FORWARD;
+ SQRADDSC(a[5], a[15]);
+ SQRADDAC(a[6], a[14]);
+ SQRADDAC(a[7], a[13]);
+ SQRADDAC(a[8], a[12]);
+ SQRADDAC(a[9], a[11]);
+ SQRADDDB;
+ SQRADD(a[10], a[10]);
+ COMBA_STORE(b[20]);
+
+ /* output 21 */
+ CARRY_FORWARD;
+ SQRADDSC(a[6], a[15]);
+ SQRADDAC(a[7], a[14]);
+ SQRADDAC(a[8], a[13]);
+ SQRADDAC(a[9], a[12]);
+ SQRADDAC(a[10], a[11]);
+ SQRADDDB;
+ COMBA_STORE(b[21]);
+
+ /* output 22 */
+ CARRY_FORWARD;
+ SQRADDSC(a[7], a[15]);
+ SQRADDAC(a[8], a[14]);
+ SQRADDAC(a[9], a[13]);
+ SQRADDAC(a[10], a[12]);
+ SQRADDDB;
+ SQRADD(a[11], a[11]);
+ COMBA_STORE(b[22]);
+
+ /* output 23 */
+ CARRY_FORWARD;
+ SQRADDSC(a[8], a[15]);
+ SQRADDAC(a[9], a[14]);
+ SQRADDAC(a[10], a[13]);
+ SQRADDAC(a[11], a[12]);
+ SQRADDDB;
+ COMBA_STORE(b[23]);
+
+ /* output 24 */
+ CARRY_FORWARD;
+ SQRADDSC(a[9], a[15]);
+ SQRADDAC(a[10], a[14]);
+ SQRADDAC(a[11], a[13]);
+ SQRADDDB;
+ SQRADD(a[12], a[12]);
+ COMBA_STORE(b[24]);
+
+ /* output 25 */
+ CARRY_FORWARD;
+ SQRADDSC(a[10], a[15]);
+ SQRADDAC(a[11], a[14]);
+ SQRADDAC(a[12], a[13]);
+ SQRADDDB;
+ COMBA_STORE(b[25]);
+
+ /* output 26 */
+ CARRY_FORWARD;
+ SQRADD2(a[11], a[15]);
+ SQRADD2(a[12], a[14]);
+ SQRADD(a[13], a[13]);
+ COMBA_STORE(b[26]);
+
+ /* output 27 */
+ CARRY_FORWARD;
+ SQRADD2(a[12], a[15]);
+ SQRADD2(a[13], a[14]);
+ COMBA_STORE(b[27]);
+
+ /* output 28 */
+ CARRY_FORWARD;
+ SQRADD2(a[13], a[15]);
+ SQRADD(a[14], a[14]);
+ COMBA_STORE(b[28]);
+
+ /* output 29 */
+ CARRY_FORWARD;
+ SQRADD2(a[14], a[15]);
+ COMBA_STORE(b[29]);
+
+ /* output 30 */
+ CARRY_FORWARD;
+ SQRADD(a[15], a[15]);
+ COMBA_STORE(b[30]);
+ COMBA_STORE2(b[31]);
+ COMBA_FINI;
+
+ B->used = 32;
+ B->sign = ZPOS;
+ memcpy(B->dp, b, 32 * sizeof(mp_digit));
+ mp_clamp(B);
+}
+
+void
+s_mp_sqr_comba_32(const mp_int *A, mp_int *B)
+{
+ mp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2;
+
+ a = A->dp;
+ COMBA_START;
+
+ /* clear carries */
+ CLEAR_CARRY;
+
+ /* output 0 */
+ SQRADD(a[0], a[0]);
+ COMBA_STORE(b[0]);
+
+ /* output 1 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[1]);
+ COMBA_STORE(b[1]);
+
+ /* output 2 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[2]);
+ SQRADD(a[1], a[1]);
+ COMBA_STORE(b[2]);
+
+ /* output 3 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[3]);
+ SQRADD2(a[1], a[2]);
+ COMBA_STORE(b[3]);
+
+ /* output 4 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[4]);
+ SQRADD2(a[1], a[3]);
+ SQRADD(a[2], a[2]);
+ COMBA_STORE(b[4]);
+
+ /* output 5 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[5]);
+ SQRADDAC(a[1], a[4]);
+ SQRADDAC(a[2], a[3]);
+ SQRADDDB;
+ COMBA_STORE(b[5]);
+
+ /* output 6 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[6]);
+ SQRADDAC(a[1], a[5]);
+ SQRADDAC(a[2], a[4]);
+ SQRADDDB;
+ SQRADD(a[3], a[3]);
+ COMBA_STORE(b[6]);
+
+ /* output 7 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[7]);
+ SQRADDAC(a[1], a[6]);
+ SQRADDAC(a[2], a[5]);
+ SQRADDAC(a[3], a[4]);
+ SQRADDDB;
+ COMBA_STORE(b[7]);
+
+ /* output 8 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[8]);
+ SQRADDAC(a[1], a[7]);
+ SQRADDAC(a[2], a[6]);
+ SQRADDAC(a[3], a[5]);
+ SQRADDDB;
+ SQRADD(a[4], a[4]);
+ COMBA_STORE(b[8]);
+
+ /* output 9 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[9]);
+ SQRADDAC(a[1], a[8]);
+ SQRADDAC(a[2], a[7]);
+ SQRADDAC(a[3], a[6]);
+ SQRADDAC(a[4], a[5]);
+ SQRADDDB;
+ COMBA_STORE(b[9]);
+
+ /* output 10 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[10]);
+ SQRADDAC(a[1], a[9]);
+ SQRADDAC(a[2], a[8]);
+ SQRADDAC(a[3], a[7]);
+ SQRADDAC(a[4], a[6]);
+ SQRADDDB;
+ SQRADD(a[5], a[5]);
+ COMBA_STORE(b[10]);
+
+ /* output 11 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[11]);
+ SQRADDAC(a[1], a[10]);
+ SQRADDAC(a[2], a[9]);
+ SQRADDAC(a[3], a[8]);
+ SQRADDAC(a[4], a[7]);
+ SQRADDAC(a[5], a[6]);
+ SQRADDDB;
+ COMBA_STORE(b[11]);
+
+ /* output 12 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[12]);
+ SQRADDAC(a[1], a[11]);
+ SQRADDAC(a[2], a[10]);
+ SQRADDAC(a[3], a[9]);
+ SQRADDAC(a[4], a[8]);
+ SQRADDAC(a[5], a[7]);
+ SQRADDDB;
+ SQRADD(a[6], a[6]);
+ COMBA_STORE(b[12]);
+
+ /* output 13 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[13]);
+ SQRADDAC(a[1], a[12]);
+ SQRADDAC(a[2], a[11]);
+ SQRADDAC(a[3], a[10]);
+ SQRADDAC(a[4], a[9]);
+ SQRADDAC(a[5], a[8]);
+ SQRADDAC(a[6], a[7]);
+ SQRADDDB;
+ COMBA_STORE(b[13]);
+
+ /* output 14 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[14]);
+ SQRADDAC(a[1], a[13]);
+ SQRADDAC(a[2], a[12]);
+ SQRADDAC(a[3], a[11]);
+ SQRADDAC(a[4], a[10]);
+ SQRADDAC(a[5], a[9]);
+ SQRADDAC(a[6], a[8]);
+ SQRADDDB;
+ SQRADD(a[7], a[7]);
+ COMBA_STORE(b[14]);
+
+ /* output 15 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[15]);
+ SQRADDAC(a[1], a[14]);
+ SQRADDAC(a[2], a[13]);
+ SQRADDAC(a[3], a[12]);
+ SQRADDAC(a[4], a[11]);
+ SQRADDAC(a[5], a[10]);
+ SQRADDAC(a[6], a[9]);
+ SQRADDAC(a[7], a[8]);
+ SQRADDDB;
+ COMBA_STORE(b[15]);
+
+ /* output 16 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[16]);
+ SQRADDAC(a[1], a[15]);
+ SQRADDAC(a[2], a[14]);
+ SQRADDAC(a[3], a[13]);
+ SQRADDAC(a[4], a[12]);
+ SQRADDAC(a[5], a[11]);
+ SQRADDAC(a[6], a[10]);
+ SQRADDAC(a[7], a[9]);
+ SQRADDDB;
+ SQRADD(a[8], a[8]);
+ COMBA_STORE(b[16]);
+
+ /* output 17 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[17]);
+ SQRADDAC(a[1], a[16]);
+ SQRADDAC(a[2], a[15]);
+ SQRADDAC(a[3], a[14]);
+ SQRADDAC(a[4], a[13]);
+ SQRADDAC(a[5], a[12]);
+ SQRADDAC(a[6], a[11]);
+ SQRADDAC(a[7], a[10]);
+ SQRADDAC(a[8], a[9]);
+ SQRADDDB;
+ COMBA_STORE(b[17]);
+
+ /* output 18 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[18]);
+ SQRADDAC(a[1], a[17]);
+ SQRADDAC(a[2], a[16]);
+ SQRADDAC(a[3], a[15]);
+ SQRADDAC(a[4], a[14]);
+ SQRADDAC(a[5], a[13]);
+ SQRADDAC(a[6], a[12]);
+ SQRADDAC(a[7], a[11]);
+ SQRADDAC(a[8], a[10]);
+ SQRADDDB;
+ SQRADD(a[9], a[9]);
+ COMBA_STORE(b[18]);
+
+ /* output 19 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[19]);
+ SQRADDAC(a[1], a[18]);
+ SQRADDAC(a[2], a[17]);
+ SQRADDAC(a[3], a[16]);
+ SQRADDAC(a[4], a[15]);
+ SQRADDAC(a[5], a[14]);
+ SQRADDAC(a[6], a[13]);
+ SQRADDAC(a[7], a[12]);
+ SQRADDAC(a[8], a[11]);
+ SQRADDAC(a[9], a[10]);
+ SQRADDDB;
+ COMBA_STORE(b[19]);
+
+ /* output 20 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[20]);
+ SQRADDAC(a[1], a[19]);
+ SQRADDAC(a[2], a[18]);
+ SQRADDAC(a[3], a[17]);
+ SQRADDAC(a[4], a[16]);
+ SQRADDAC(a[5], a[15]);
+ SQRADDAC(a[6], a[14]);
+ SQRADDAC(a[7], a[13]);
+ SQRADDAC(a[8], a[12]);
+ SQRADDAC(a[9], a[11]);
+ SQRADDDB;
+ SQRADD(a[10], a[10]);
+ COMBA_STORE(b[20]);
+
+ /* output 21 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[21]);
+ SQRADDAC(a[1], a[20]);
+ SQRADDAC(a[2], a[19]);
+ SQRADDAC(a[3], a[18]);
+ SQRADDAC(a[4], a[17]);
+ SQRADDAC(a[5], a[16]);
+ SQRADDAC(a[6], a[15]);
+ SQRADDAC(a[7], a[14]);
+ SQRADDAC(a[8], a[13]);
+ SQRADDAC(a[9], a[12]);
+ SQRADDAC(a[10], a[11]);
+ SQRADDDB;
+ COMBA_STORE(b[21]);
+
+ /* output 22 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[22]);
+ SQRADDAC(a[1], a[21]);
+ SQRADDAC(a[2], a[20]);
+ SQRADDAC(a[3], a[19]);
+ SQRADDAC(a[4], a[18]);
+ SQRADDAC(a[5], a[17]);
+ SQRADDAC(a[6], a[16]);
+ SQRADDAC(a[7], a[15]);
+ SQRADDAC(a[8], a[14]);
+ SQRADDAC(a[9], a[13]);
+ SQRADDAC(a[10], a[12]);
+ SQRADDDB;
+ SQRADD(a[11], a[11]);
+ COMBA_STORE(b[22]);
+
+ /* output 23 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[23]);
+ SQRADDAC(a[1], a[22]);
+ SQRADDAC(a[2], a[21]);
+ SQRADDAC(a[3], a[20]);
+ SQRADDAC(a[4], a[19]);
+ SQRADDAC(a[5], a[18]);
+ SQRADDAC(a[6], a[17]);
+ SQRADDAC(a[7], a[16]);
+ SQRADDAC(a[8], a[15]);
+ SQRADDAC(a[9], a[14]);
+ SQRADDAC(a[10], a[13]);
+ SQRADDAC(a[11], a[12]);
+ SQRADDDB;
+ COMBA_STORE(b[23]);
+
+ /* output 24 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[24]);
+ SQRADDAC(a[1], a[23]);
+ SQRADDAC(a[2], a[22]);
+ SQRADDAC(a[3], a[21]);
+ SQRADDAC(a[4], a[20]);
+ SQRADDAC(a[5], a[19]);
+ SQRADDAC(a[6], a[18]);
+ SQRADDAC(a[7], a[17]);
+ SQRADDAC(a[8], a[16]);
+ SQRADDAC(a[9], a[15]);
+ SQRADDAC(a[10], a[14]);
+ SQRADDAC(a[11], a[13]);
+ SQRADDDB;
+ SQRADD(a[12], a[12]);
+ COMBA_STORE(b[24]);
+
+ /* output 25 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[25]);
+ SQRADDAC(a[1], a[24]);
+ SQRADDAC(a[2], a[23]);
+ SQRADDAC(a[3], a[22]);
+ SQRADDAC(a[4], a[21]);
+ SQRADDAC(a[5], a[20]);
+ SQRADDAC(a[6], a[19]);
+ SQRADDAC(a[7], a[18]);
+ SQRADDAC(a[8], a[17]);
+ SQRADDAC(a[9], a[16]);
+ SQRADDAC(a[10], a[15]);
+ SQRADDAC(a[11], a[14]);
+ SQRADDAC(a[12], a[13]);
+ SQRADDDB;
+ COMBA_STORE(b[25]);
+
+ /* output 26 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[26]);
+ SQRADDAC(a[1], a[25]);
+ SQRADDAC(a[2], a[24]);
+ SQRADDAC(a[3], a[23]);
+ SQRADDAC(a[4], a[22]);
+ SQRADDAC(a[5], a[21]);
+ SQRADDAC(a[6], a[20]);
+ SQRADDAC(a[7], a[19]);
+ SQRADDAC(a[8], a[18]);
+ SQRADDAC(a[9], a[17]);
+ SQRADDAC(a[10], a[16]);
+ SQRADDAC(a[11], a[15]);
+ SQRADDAC(a[12], a[14]);
+ SQRADDDB;
+ SQRADD(a[13], a[13]);
+ COMBA_STORE(b[26]);
+
+ /* output 27 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[27]);
+ SQRADDAC(a[1], a[26]);
+ SQRADDAC(a[2], a[25]);
+ SQRADDAC(a[3], a[24]);
+ SQRADDAC(a[4], a[23]);
+ SQRADDAC(a[5], a[22]);
+ SQRADDAC(a[6], a[21]);
+ SQRADDAC(a[7], a[20]);
+ SQRADDAC(a[8], a[19]);
+ SQRADDAC(a[9], a[18]);
+ SQRADDAC(a[10], a[17]);
+ SQRADDAC(a[11], a[16]);
+ SQRADDAC(a[12], a[15]);
+ SQRADDAC(a[13], a[14]);
+ SQRADDDB;
+ COMBA_STORE(b[27]);
+
+ /* output 28 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[28]);
+ SQRADDAC(a[1], a[27]);
+ SQRADDAC(a[2], a[26]);
+ SQRADDAC(a[3], a[25]);
+ SQRADDAC(a[4], a[24]);
+ SQRADDAC(a[5], a[23]);
+ SQRADDAC(a[6], a[22]);
+ SQRADDAC(a[7], a[21]);
+ SQRADDAC(a[8], a[20]);
+ SQRADDAC(a[9], a[19]);
+ SQRADDAC(a[10], a[18]);
+ SQRADDAC(a[11], a[17]);
+ SQRADDAC(a[12], a[16]);
+ SQRADDAC(a[13], a[15]);
+ SQRADDDB;
+ SQRADD(a[14], a[14]);
+ COMBA_STORE(b[28]);
+
+ /* output 29 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[29]);
+ SQRADDAC(a[1], a[28]);
+ SQRADDAC(a[2], a[27]);
+ SQRADDAC(a[3], a[26]);
+ SQRADDAC(a[4], a[25]);
+ SQRADDAC(a[5], a[24]);
+ SQRADDAC(a[6], a[23]);
+ SQRADDAC(a[7], a[22]);
+ SQRADDAC(a[8], a[21]);
+ SQRADDAC(a[9], a[20]);
+ SQRADDAC(a[10], a[19]);
+ SQRADDAC(a[11], a[18]);
+ SQRADDAC(a[12], a[17]);
+ SQRADDAC(a[13], a[16]);
+ SQRADDAC(a[14], a[15]);
+ SQRADDDB;
+ COMBA_STORE(b[29]);
+
+ /* output 30 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[30]);
+ SQRADDAC(a[1], a[29]);
+ SQRADDAC(a[2], a[28]);
+ SQRADDAC(a[3], a[27]);
+ SQRADDAC(a[4], a[26]);
+ SQRADDAC(a[5], a[25]);
+ SQRADDAC(a[6], a[24]);
+ SQRADDAC(a[7], a[23]);
+ SQRADDAC(a[8], a[22]);
+ SQRADDAC(a[9], a[21]);
+ SQRADDAC(a[10], a[20]);
+ SQRADDAC(a[11], a[19]);
+ SQRADDAC(a[12], a[18]);
+ SQRADDAC(a[13], a[17]);
+ SQRADDAC(a[14], a[16]);
+ SQRADDDB;
+ SQRADD(a[15], a[15]);
+ COMBA_STORE(b[30]);
+
+ /* output 31 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[31]);
+ SQRADDAC(a[1], a[30]);
+ SQRADDAC(a[2], a[29]);
+ SQRADDAC(a[3], a[28]);
+ SQRADDAC(a[4], a[27]);
+ SQRADDAC(a[5], a[26]);
+ SQRADDAC(a[6], a[25]);
+ SQRADDAC(a[7], a[24]);
+ SQRADDAC(a[8], a[23]);
+ SQRADDAC(a[9], a[22]);
+ SQRADDAC(a[10], a[21]);
+ SQRADDAC(a[11], a[20]);
+ SQRADDAC(a[12], a[19]);
+ SQRADDAC(a[13], a[18]);
+ SQRADDAC(a[14], a[17]);
+ SQRADDAC(a[15], a[16]);
+ SQRADDDB;
+ COMBA_STORE(b[31]);
+
+ /* output 32 */
+ CARRY_FORWARD;
+ SQRADDSC(a[1], a[31]);
+ SQRADDAC(a[2], a[30]);
+ SQRADDAC(a[3], a[29]);
+ SQRADDAC(a[4], a[28]);
+ SQRADDAC(a[5], a[27]);
+ SQRADDAC(a[6], a[26]);
+ SQRADDAC(a[7], a[25]);
+ SQRADDAC(a[8], a[24]);
+ SQRADDAC(a[9], a[23]);
+ SQRADDAC(a[10], a[22]);
+ SQRADDAC(a[11], a[21]);
+ SQRADDAC(a[12], a[20]);
+ SQRADDAC(a[13], a[19]);
+ SQRADDAC(a[14], a[18]);
+ SQRADDAC(a[15], a[17]);
+ SQRADDDB;
+ SQRADD(a[16], a[16]);
+ COMBA_STORE(b[32]);
+
+ /* output 33 */
+ CARRY_FORWARD;
+ SQRADDSC(a[2], a[31]);
+ SQRADDAC(a[3], a[30]);
+ SQRADDAC(a[4], a[29]);
+ SQRADDAC(a[5], a[28]);
+ SQRADDAC(a[6], a[27]);
+ SQRADDAC(a[7], a[26]);
+ SQRADDAC(a[8], a[25]);
+ SQRADDAC(a[9], a[24]);
+ SQRADDAC(a[10], a[23]);
+ SQRADDAC(a[11], a[22]);
+ SQRADDAC(a[12], a[21]);
+ SQRADDAC(a[13], a[20]);
+ SQRADDAC(a[14], a[19]);
+ SQRADDAC(a[15], a[18]);
+ SQRADDAC(a[16], a[17]);
+ SQRADDDB;
+ COMBA_STORE(b[33]);
+
+ /* output 34 */
+ CARRY_FORWARD;
+ SQRADDSC(a[3], a[31]);
+ SQRADDAC(a[4], a[30]);
+ SQRADDAC(a[5], a[29]);
+ SQRADDAC(a[6], a[28]);
+ SQRADDAC(a[7], a[27]);
+ SQRADDAC(a[8], a[26]);
+ SQRADDAC(a[9], a[25]);
+ SQRADDAC(a[10], a[24]);
+ SQRADDAC(a[11], a[23]);
+ SQRADDAC(a[12], a[22]);
+ SQRADDAC(a[13], a[21]);
+ SQRADDAC(a[14], a[20]);
+ SQRADDAC(a[15], a[19]);
+ SQRADDAC(a[16], a[18]);
+ SQRADDDB;
+ SQRADD(a[17], a[17]);
+ COMBA_STORE(b[34]);
+
+ /* output 35 */
+ CARRY_FORWARD;
+ SQRADDSC(a[4], a[31]);
+ SQRADDAC(a[5], a[30]);
+ SQRADDAC(a[6], a[29]);
+ SQRADDAC(a[7], a[28]);
+ SQRADDAC(a[8], a[27]);
+ SQRADDAC(a[9], a[26]);
+ SQRADDAC(a[10], a[25]);
+ SQRADDAC(a[11], a[24]);
+ SQRADDAC(a[12], a[23]);
+ SQRADDAC(a[13], a[22]);
+ SQRADDAC(a[14], a[21]);
+ SQRADDAC(a[15], a[20]);
+ SQRADDAC(a[16], a[19]);
+ SQRADDAC(a[17], a[18]);
+ SQRADDDB;
+ COMBA_STORE(b[35]);
+
+ /* output 36 */
+ CARRY_FORWARD;
+ SQRADDSC(a[5], a[31]);
+ SQRADDAC(a[6], a[30]);
+ SQRADDAC(a[7], a[29]);
+ SQRADDAC(a[8], a[28]);
+ SQRADDAC(a[9], a[27]);
+ SQRADDAC(a[10], a[26]);
+ SQRADDAC(a[11], a[25]);
+ SQRADDAC(a[12], a[24]);
+ SQRADDAC(a[13], a[23]);
+ SQRADDAC(a[14], a[22]);
+ SQRADDAC(a[15], a[21]);
+ SQRADDAC(a[16], a[20]);
+ SQRADDAC(a[17], a[19]);
+ SQRADDDB;
+ SQRADD(a[18], a[18]);
+ COMBA_STORE(b[36]);
+
+ /* output 37 */
+ CARRY_FORWARD;
+ SQRADDSC(a[6], a[31]);
+ SQRADDAC(a[7], a[30]);
+ SQRADDAC(a[8], a[29]);
+ SQRADDAC(a[9], a[28]);
+ SQRADDAC(a[10], a[27]);
+ SQRADDAC(a[11], a[26]);
+ SQRADDAC(a[12], a[25]);
+ SQRADDAC(a[13], a[24]);
+ SQRADDAC(a[14], a[23]);
+ SQRADDAC(a[15], a[22]);
+ SQRADDAC(a[16], a[21]);
+ SQRADDAC(a[17], a[20]);
+ SQRADDAC(a[18], a[19]);
+ SQRADDDB;
+ COMBA_STORE(b[37]);
+
+ /* output 38 */
+ CARRY_FORWARD;
+ SQRADDSC(a[7], a[31]);
+ SQRADDAC(a[8], a[30]);
+ SQRADDAC(a[9], a[29]);
+ SQRADDAC(a[10], a[28]);
+ SQRADDAC(a[11], a[27]);
+ SQRADDAC(a[12], a[26]);
+ SQRADDAC(a[13], a[25]);
+ SQRADDAC(a[14], a[24]);
+ SQRADDAC(a[15], a[23]);
+ SQRADDAC(a[16], a[22]);
+ SQRADDAC(a[17], a[21]);
+ SQRADDAC(a[18], a[20]);
+ SQRADDDB;
+ SQRADD(a[19], a[19]);
+ COMBA_STORE(b[38]);
+
+ /* output 39 */
+ CARRY_FORWARD;
+ SQRADDSC(a[8], a[31]);
+ SQRADDAC(a[9], a[30]);
+ SQRADDAC(a[10], a[29]);
+ SQRADDAC(a[11], a[28]);
+ SQRADDAC(a[12], a[27]);
+ SQRADDAC(a[13], a[26]);
+ SQRADDAC(a[14], a[25]);
+ SQRADDAC(a[15], a[24]);
+ SQRADDAC(a[16], a[23]);
+ SQRADDAC(a[17], a[22]);
+ SQRADDAC(a[18], a[21]);
+ SQRADDAC(a[19], a[20]);
+ SQRADDDB;
+ COMBA_STORE(b[39]);
+
+ /* output 40 */
+ CARRY_FORWARD;
+ SQRADDSC(a[9], a[31]);
+ SQRADDAC(a[10], a[30]);
+ SQRADDAC(a[11], a[29]);
+ SQRADDAC(a[12], a[28]);
+ SQRADDAC(a[13], a[27]);
+ SQRADDAC(a[14], a[26]);
+ SQRADDAC(a[15], a[25]);
+ SQRADDAC(a[16], a[24]);
+ SQRADDAC(a[17], a[23]);
+ SQRADDAC(a[18], a[22]);
+ SQRADDAC(a[19], a[21]);
+ SQRADDDB;
+ SQRADD(a[20], a[20]);
+ COMBA_STORE(b[40]);
+
+ /* output 41 */
+ CARRY_FORWARD;
+ SQRADDSC(a[10], a[31]);
+ SQRADDAC(a[11], a[30]);
+ SQRADDAC(a[12], a[29]);
+ SQRADDAC(a[13], a[28]);
+ SQRADDAC(a[14], a[27]);
+ SQRADDAC(a[15], a[26]);
+ SQRADDAC(a[16], a[25]);
+ SQRADDAC(a[17], a[24]);
+ SQRADDAC(a[18], a[23]);
+ SQRADDAC(a[19], a[22]);
+ SQRADDAC(a[20], a[21]);
+ SQRADDDB;
+ COMBA_STORE(b[41]);
+
+ /* output 42 */
+ CARRY_FORWARD;
+ SQRADDSC(a[11], a[31]);
+ SQRADDAC(a[12], a[30]);
+ SQRADDAC(a[13], a[29]);
+ SQRADDAC(a[14], a[28]);
+ SQRADDAC(a[15], a[27]);
+ SQRADDAC(a[16], a[26]);
+ SQRADDAC(a[17], a[25]);
+ SQRADDAC(a[18], a[24]);
+ SQRADDAC(a[19], a[23]);
+ SQRADDAC(a[20], a[22]);
+ SQRADDDB;
+ SQRADD(a[21], a[21]);
+ COMBA_STORE(b[42]);
+
+ /* output 43 */
+ CARRY_FORWARD;
+ SQRADDSC(a[12], a[31]);
+ SQRADDAC(a[13], a[30]);
+ SQRADDAC(a[14], a[29]);
+ SQRADDAC(a[15], a[28]);
+ SQRADDAC(a[16], a[27]);
+ SQRADDAC(a[17], a[26]);
+ SQRADDAC(a[18], a[25]);
+ SQRADDAC(a[19], a[24]);
+ SQRADDAC(a[20], a[23]);
+ SQRADDAC(a[21], a[22]);
+ SQRADDDB;
+ COMBA_STORE(b[43]);
+
+ /* output 44 */
+ CARRY_FORWARD;
+ SQRADDSC(a[13], a[31]);
+ SQRADDAC(a[14], a[30]);
+ SQRADDAC(a[15], a[29]);
+ SQRADDAC(a[16], a[28]);
+ SQRADDAC(a[17], a[27]);
+ SQRADDAC(a[18], a[26]);
+ SQRADDAC(a[19], a[25]);
+ SQRADDAC(a[20], a[24]);
+ SQRADDAC(a[21], a[23]);
+ SQRADDDB;
+ SQRADD(a[22], a[22]);
+ COMBA_STORE(b[44]);
+
+ /* output 45 */
+ CARRY_FORWARD;
+ SQRADDSC(a[14], a[31]);
+ SQRADDAC(a[15], a[30]);
+ SQRADDAC(a[16], a[29]);
+ SQRADDAC(a[17], a[28]);
+ SQRADDAC(a[18], a[27]);
+ SQRADDAC(a[19], a[26]);
+ SQRADDAC(a[20], a[25]);
+ SQRADDAC(a[21], a[24]);
+ SQRADDAC(a[22], a[23]);
+ SQRADDDB;
+ COMBA_STORE(b[45]);
+
+ /* output 46 */
+ CARRY_FORWARD;
+ SQRADDSC(a[15], a[31]);
+ SQRADDAC(a[16], a[30]);
+ SQRADDAC(a[17], a[29]);
+ SQRADDAC(a[18], a[28]);
+ SQRADDAC(a[19], a[27]);
+ SQRADDAC(a[20], a[26]);
+ SQRADDAC(a[21], a[25]);
+ SQRADDAC(a[22], a[24]);
+ SQRADDDB;
+ SQRADD(a[23], a[23]);
+ COMBA_STORE(b[46]);
+
+ /* output 47 */
+ CARRY_FORWARD;
+ SQRADDSC(a[16], a[31]);
+ SQRADDAC(a[17], a[30]);
+ SQRADDAC(a[18], a[29]);
+ SQRADDAC(a[19], a[28]);
+ SQRADDAC(a[20], a[27]);
+ SQRADDAC(a[21], a[26]);
+ SQRADDAC(a[22], a[25]);
+ SQRADDAC(a[23], a[24]);
+ SQRADDDB;
+ COMBA_STORE(b[47]);
+
+ /* output 48 */
+ CARRY_FORWARD;
+ SQRADDSC(a[17], a[31]);
+ SQRADDAC(a[18], a[30]);
+ SQRADDAC(a[19], a[29]);
+ SQRADDAC(a[20], a[28]);
+ SQRADDAC(a[21], a[27]);
+ SQRADDAC(a[22], a[26]);
+ SQRADDAC(a[23], a[25]);
+ SQRADDDB;
+ SQRADD(a[24], a[24]);
+ COMBA_STORE(b[48]);
+
+ /* output 49 */
+ CARRY_FORWARD;
+ SQRADDSC(a[18], a[31]);
+ SQRADDAC(a[19], a[30]);
+ SQRADDAC(a[20], a[29]);
+ SQRADDAC(a[21], a[28]);
+ SQRADDAC(a[22], a[27]);
+ SQRADDAC(a[23], a[26]);
+ SQRADDAC(a[24], a[25]);
+ SQRADDDB;
+ COMBA_STORE(b[49]);
+
+ /* output 50 */
+ CARRY_FORWARD;
+ SQRADDSC(a[19], a[31]);
+ SQRADDAC(a[20], a[30]);
+ SQRADDAC(a[21], a[29]);
+ SQRADDAC(a[22], a[28]);
+ SQRADDAC(a[23], a[27]);
+ SQRADDAC(a[24], a[26]);
+ SQRADDDB;
+ SQRADD(a[25], a[25]);
+ COMBA_STORE(b[50]);
+
+ /* output 51 */
+ CARRY_FORWARD;
+ SQRADDSC(a[20], a[31]);
+ SQRADDAC(a[21], a[30]);
+ SQRADDAC(a[22], a[29]);
+ SQRADDAC(a[23], a[28]);
+ SQRADDAC(a[24], a[27]);
+ SQRADDAC(a[25], a[26]);
+ SQRADDDB;
+ COMBA_STORE(b[51]);
+
+ /* output 52 */
+ CARRY_FORWARD;
+ SQRADDSC(a[21], a[31]);
+ SQRADDAC(a[22], a[30]);
+ SQRADDAC(a[23], a[29]);
+ SQRADDAC(a[24], a[28]);
+ SQRADDAC(a[25], a[27]);
+ SQRADDDB;
+ SQRADD(a[26], a[26]);
+ COMBA_STORE(b[52]);
+
+ /* output 53 */
+ CARRY_FORWARD;
+ SQRADDSC(a[22], a[31]);
+ SQRADDAC(a[23], a[30]);
+ SQRADDAC(a[24], a[29]);
+ SQRADDAC(a[25], a[28]);
+ SQRADDAC(a[26], a[27]);
+ SQRADDDB;
+ COMBA_STORE(b[53]);
+
+ /* output 54 */
+ CARRY_FORWARD;
+ SQRADDSC(a[23], a[31]);
+ SQRADDAC(a[24], a[30]);
+ SQRADDAC(a[25], a[29]);
+ SQRADDAC(a[26], a[28]);
+ SQRADDDB;
+ SQRADD(a[27], a[27]);
+ COMBA_STORE(b[54]);
+
+ /* output 55 */
+ CARRY_FORWARD;
+ SQRADDSC(a[24], a[31]);
+ SQRADDAC(a[25], a[30]);
+ SQRADDAC(a[26], a[29]);
+ SQRADDAC(a[27], a[28]);
+ SQRADDDB;
+ COMBA_STORE(b[55]);
+
+ /* output 56 */
+ CARRY_FORWARD;
+ SQRADDSC(a[25], a[31]);
+ SQRADDAC(a[26], a[30]);
+ SQRADDAC(a[27], a[29]);
+ SQRADDDB;
+ SQRADD(a[28], a[28]);
+ COMBA_STORE(b[56]);
+
+ /* output 57 */
+ CARRY_FORWARD;
+ SQRADDSC(a[26], a[31]);
+ SQRADDAC(a[27], a[30]);
+ SQRADDAC(a[28], a[29]);
+ SQRADDDB;
+ COMBA_STORE(b[57]);
+
+ /* output 58 */
+ CARRY_FORWARD;
+ SQRADD2(a[27], a[31]);
+ SQRADD2(a[28], a[30]);
+ SQRADD(a[29], a[29]);
+ COMBA_STORE(b[58]);
+
+ /* output 59 */
+ CARRY_FORWARD;
+ SQRADD2(a[28], a[31]);
+ SQRADD2(a[29], a[30]);
+ COMBA_STORE(b[59]);
+
+ /* output 60 */
+ CARRY_FORWARD;
+ SQRADD2(a[29], a[31]);
+ SQRADD(a[30], a[30]);
+ COMBA_STORE(b[60]);
+
+ /* output 61 */
+ CARRY_FORWARD;
+ SQRADD2(a[30], a[31]);
+ COMBA_STORE(b[61]);
+
+ /* output 62 */
+ CARRY_FORWARD;
+ SQRADD(a[31], a[31]);
+ COMBA_STORE(b[62]);
+ COMBA_STORE2(b[63]);
+ COMBA_FINI;
+
+ B->used = 64;
+ B->sign = ZPOS;
+ memcpy(B->dp, b, 64 * sizeof(mp_digit));
+ mp_clamp(B);
+}
diff --git a/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm b/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm
new file mode 100644
index 0000000000..e50efa8de3
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm
@@ -0,0 +1,13066 @@
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this
+; file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+;/* TomsFastMath, a fast ISO C bignum library.
+; *
+; * This project is meant to fill in where LibTomMath
+; * falls short. That is speed ;-)
+; *
+; * This project is public domain and free for all purposes.
+; *
+; * Tom St Denis, tomstdenis@iahu.ca
+; */
+
+;/*
+; * The source file from which this assembly was derived
+; * comes from TFM v0.03, which has the above license.
+; * This source was from mp_comba_amd64.sun.s and convert to
+; * MASM code set.
+; */
+
+.CODE
+
+externdef memcpy:PROC
+
+public s_mp_mul_comba_4
+public s_mp_mul_comba_8
+public s_mp_mul_comba_16
+public s_mp_mul_comba_32
+public s_mp_sqr_comba_8
+public s_mp_sqr_comba_16
+public s_mp_sqr_comba_32
+
+
+; void s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C)
+
+ ALIGN 16
+s_mp_mul_comba_4 PROC
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+
+ push r12
+ push rbp
+ push rbx
+ sub rsp, 64
+ mov r9, qword ptr [16+rdi]
+ mov rbx, rdx
+ mov rdx, qword ptr [16+rsi]
+ mov rax, qword ptr [r9]
+ mov qword ptr [-64+64+rsp], rax
+ mov r8, qword ptr [8+r9]
+ mov qword ptr [-56+64+rsp], r8
+ mov rbp, qword ptr [16+r9]
+ mov qword ptr [-48+64+rsp], rbp
+ mov r12, qword ptr [24+r9]
+ mov qword ptr [-40+64+rsp], r12
+ mov rcx, qword ptr [rdx]
+ mov qword ptr [-32+64+rsp], rcx
+ mov r10, qword ptr [8+rdx]
+ mov qword ptr [-24+64+rsp], r10
+ mov r11, qword ptr [16+rdx]
+ xor r10d, r10d
+ mov r8, r10
+ mov r9, r10
+ mov rbp, r10
+ mov qword ptr [-16+64+rsp], r11
+ mov r11, qword ptr [16+rbx]
+ mov rax, qword ptr [24+rdx]
+ mov qword ptr [-8+64+rsp], rax
+ mov rax, qword ptr [-64+64+rsp]
+ mul qword ptr [-32+64+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rbp, 0
+ mov qword ptr [r11], r8
+ mov r8, rbp
+ mov rbp, r10
+ mov rax, qword ptr [-64+64+rsp]
+ mul qword ptr [-24+64+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc rbp, 0
+ mov r12, rbp
+ mov rax, qword ptr [-56+64+rsp]
+ mul qword ptr [-32+64+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc r12, 0
+ mov qword ptr [8+r11], r9
+ mov r9, r12
+ mov r12, r10
+ mov rax, qword ptr [-64+64+rsp]
+ mul qword ptr [-16+64+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc r12, 0
+ mov rcx, r12
+ mov rax, qword ptr [-56+64+rsp]
+ mul qword ptr [-24+64+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-48+64+rsp]
+ mul qword ptr [-32+64+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [16+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-64+64+rsp]
+ mul qword ptr [-8+64+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+64+rsp]
+ mul qword ptr [-16+64+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+64+rsp]
+ mul qword ptr [-24+64+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-40+64+rsp]
+ mul qword ptr [-32+64+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [24+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-56+64+rsp]
+ mul qword ptr [-8+64+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+64+rsp]
+ mul qword ptr [-16+64+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-40+64+rsp]
+ mul qword ptr [-24+64+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [32+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-48+64+rsp]
+ mul qword ptr [-8+64+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov r12, r8
+ mov rbp, r9
+ mov rax, qword ptr [-40+64+rsp]
+ mul qword ptr [-16+64+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [40+r11], rcx
+ mov r8, rbp
+ mov rcx, r12
+ mov rax, qword ptr [-40+64+rsp]
+ mul qword ptr [-8+64+rsp]
+ add r8, rax
+ adc rcx, rdx
+ adc r10, 0
+ mov qword ptr [48+r11], r8
+ mov esi, dword ptr [rsi]
+ xor esi, dword ptr [rdi]
+ test rcx, rcx
+ mov qword ptr [56+r11], rcx
+ mov dword ptr [8+rbx], 8
+ jne L9
+ ALIGN 16
+L18:
+ mov edx, dword ptr [8+rbx]
+ lea edi, dword ptr [-1+rdx]
+ test edi, edi
+ mov dword ptr [8+rbx], edi
+ je L9
+ lea r10d, dword ptr [-2+rdx]
+ cmp qword ptr [r11+r10*8], 0
+ je L18
+L9:
+ mov edx, dword ptr [8+rbx]
+ xor r11d, r11d
+ test edx, edx
+ cmovne r11d, esi
+ mov dword ptr [rbx], r11d
+ add rsp, 64
+ pop rbx
+ pop rbp
+ pop r12
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_mul_comba_4 ENDP
+
+
+; void s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C)
+
+ ALIGN 16
+s_mp_mul_comba_8 PROC
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+
+ push r12
+ push rbp
+ push rbx
+ mov rbx, rdx
+ sub rsp, 8+128
+ mov rdx, qword ptr [16+rdi]
+ mov r8, qword ptr [rdx]
+ mov qword ptr [-120+128+rsp], r8
+ mov rbp, qword ptr [8+rdx]
+ mov qword ptr [-112+128+rsp], rbp
+ mov r9, qword ptr [16+rdx]
+ mov qword ptr [-104+128+rsp], r9
+ mov r12, qword ptr [24+rdx]
+ mov qword ptr [-96+128+rsp], r12
+ mov rcx, qword ptr [32+rdx]
+ mov qword ptr [-88+128+rsp], rcx
+ mov r10, qword ptr [40+rdx]
+ mov qword ptr [-80+128+rsp], r10
+ mov r11, qword ptr [48+rdx]
+ mov qword ptr [-72+128+rsp], r11
+ mov rax, qword ptr [56+rdx]
+ mov rdx, qword ptr [16+rsi]
+ mov qword ptr [-64+128+rsp], rax
+ mov r8, qword ptr [rdx]
+ mov qword ptr [-56+128+rsp], r8
+ mov rbp, qword ptr [8+rdx]
+ mov qword ptr [-48+128+rsp], rbp
+ mov r9, qword ptr [16+rdx]
+ mov qword ptr [-40+128+rsp], r9
+ mov r12, qword ptr [24+rdx]
+ mov qword ptr [-32+128+rsp], r12
+ mov rcx, qword ptr [32+rdx]
+ mov qword ptr [-24+128+rsp], rcx
+ mov r10, qword ptr [40+rdx]
+ mov qword ptr [-16+128+rsp], r10
+ mov r11, qword ptr [48+rdx]
+ xor r10d, r10d
+ mov r8, r10
+ mov r9, r10
+ mov rbp, r10
+ mov qword ptr [-8+128+rsp], r11
+ mov r11, qword ptr [16+rbx]
+ mov rax, qword ptr [56+rdx]
+ mov qword ptr [128+rsp], rax
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rbp, 0
+ mov qword ptr [r11], r8
+ mov r8, rbp
+ mov rbp, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc rbp, 0
+ mov r12, rbp
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc r12, 0
+ mov qword ptr [8+r11], r9
+ mov r9, r12
+ mov r12, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc r12, 0
+ mov rcx, r12
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [16+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [24+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [32+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [40+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [48+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [56+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [64+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [72+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [80+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [88+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [96+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov r12, r8
+ mov rbp, r9
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [104+r11], rcx
+ mov r8, rbp
+ mov rcx, r12
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [128+rsp]
+ add r8, rax
+ adc rcx, rdx
+ adc r10, 0
+ mov qword ptr [112+r11], r8
+ mov esi, dword ptr [rsi]
+ xor esi, dword ptr [rdi]
+ test rcx, rcx
+ mov qword ptr [120+r11], rcx
+ mov dword ptr [8+rbx], 16
+ jne L35
+ ALIGN 16
+L43:
+ mov edx, dword ptr [8+rbx]
+ lea edi, dword ptr [-1+rdx]
+ test edi, edi
+ mov dword ptr [8+rbx], edi
+ je L35
+ lea eax, dword ptr [-2+rdx]
+ cmp qword ptr [r11+rax*8], 0
+ je L43
+L35:
+ mov r11d, dword ptr [8+rbx]
+ xor edx, edx
+ test r11d, r11d
+ cmovne edx, esi
+ mov dword ptr [rbx], edx
+ add rsp, 8+128
+ pop rbx
+ pop rbp
+ pop r12
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_mul_comba_8 ENDP
+
+
+; void s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C);
+
+ ALIGN 16
+s_mp_mul_comba_16 PROC
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+
+ push r12
+ push rbp
+ push rbx
+ mov rbx, rdx
+ sub rsp, 136+128
+ mov rax, qword ptr [16+rdi]
+ mov r8, qword ptr [rax]
+ mov qword ptr [-120+128+rsp], r8
+ mov rbp, qword ptr [8+rax]
+ mov qword ptr [-112+128+rsp], rbp
+ mov r9, qword ptr [16+rax]
+ mov qword ptr [-104+128+rsp], r9
+ mov r12, qword ptr [24+rax]
+ mov qword ptr [-96+128+rsp], r12
+ mov rcx, qword ptr [32+rax]
+ mov qword ptr [-88+128+rsp], rcx
+ mov r10, qword ptr [40+rax]
+ mov qword ptr [-80+128+rsp], r10
+ mov rdx, qword ptr [48+rax]
+ mov qword ptr [-72+128+rsp], rdx
+ mov r11, qword ptr [56+rax]
+ mov qword ptr [-64+128+rsp], r11
+ mov r8, qword ptr [64+rax]
+ mov qword ptr [-56+128+rsp], r8
+ mov rbp, qword ptr [72+rax]
+ mov qword ptr [-48+128+rsp], rbp
+ mov r9, qword ptr [80+rax]
+ mov qword ptr [-40+128+rsp], r9
+ mov r12, qword ptr [88+rax]
+ mov qword ptr [-32+128+rsp], r12
+ mov rcx, qword ptr [96+rax]
+ mov qword ptr [-24+128+rsp], rcx
+ mov r10, qword ptr [104+rax]
+ mov qword ptr [-16+128+rsp], r10
+ mov rdx, qword ptr [112+rax]
+ mov qword ptr [-8+128+rsp], rdx
+ mov r11, qword ptr [120+rax]
+ mov qword ptr [128+rsp], r11
+ mov r11, qword ptr [16+rsi]
+ mov r8, qword ptr [r11]
+ mov qword ptr [8+128+rsp], r8
+ mov rbp, qword ptr [8+r11]
+ mov qword ptr [16+128+rsp], rbp
+ mov r9, qword ptr [16+r11]
+ mov qword ptr [24+128+rsp], r9
+ mov r12, qword ptr [24+r11]
+ mov qword ptr [32+128+rsp], r12
+ mov rcx, qword ptr [32+r11]
+ mov qword ptr [40+128+rsp], rcx
+ mov r10, qword ptr [40+r11]
+ mov qword ptr [48+128+rsp], r10
+ mov rdx, qword ptr [48+r11]
+ mov qword ptr [56+128+rsp], rdx
+ mov rax, qword ptr [56+r11]
+ mov qword ptr [64+128+rsp], rax
+ mov r8, qword ptr [64+r11]
+ mov qword ptr [72+128+rsp], r8
+ mov rbp, qword ptr [72+r11]
+ mov qword ptr [80+128+rsp], rbp
+ mov r9, qword ptr [80+r11]
+ mov qword ptr [88+128+rsp], r9
+ mov r12, qword ptr [88+r11]
+ mov qword ptr [96+128+rsp], r12
+ mov rcx, qword ptr [96+r11]
+ mov qword ptr [104+128+rsp], rcx
+ mov r10, qword ptr [104+r11]
+ mov qword ptr [112+128+rsp], r10
+ mov rdx, qword ptr [112+r11]
+ xor r10d, r10d
+ mov r8, r10
+ mov r9, r10
+ mov rbp, r10
+ mov qword ptr [120+128+rsp], rdx
+ mov rax, qword ptr [120+r11]
+ mov qword ptr [128+128+rsp], rax
+ mov r11, qword ptr [16+rbx]
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rbp, 0
+ mov qword ptr [r11], r8
+ mov r8, rbp
+ mov rbp, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc rbp, 0
+ mov r12, rbp
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc r12, 0
+ mov qword ptr [8+r11], r9
+ mov r9, r12
+ mov r12, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc r12, 0
+ mov rcx, r12
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [16+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [24+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [32+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [40+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [48+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [56+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [64+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [72+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [80+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [88+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [96+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [104+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [112+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [120+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [128+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [136+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [144+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [152+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [160+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [168+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [176+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [184+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [192+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [200+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [208+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [216+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [224+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov r12, r8
+ mov rbp, r9
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [232+r11], rcx
+ mov r8, rbp
+ mov rcx, r12
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc rcx, rdx
+ adc r10, 0
+ mov qword ptr [240+r11], r8
+ mov esi, dword ptr [rsi]
+ xor esi, dword ptr [rdi]
+ test rcx, rcx
+ mov qword ptr [248+r11], rcx
+ mov dword ptr [8+rbx], 32
+ jne L76
+ ALIGN 16
+L84:
+ mov edx, dword ptr [8+rbx]
+ lea edi, dword ptr [-1+rdx]
+ test edi, edi
+ mov dword ptr [8+rbx], edi
+ je L76
+ lea eax, dword ptr [-2+rdx]
+ cmp qword ptr [r11+rax*8], 0
+ je L84
+L76:
+ mov edx, dword ptr [8+rbx]
+ xor r11d, r11d
+ test edx, edx
+ cmovne r11d, esi
+ mov dword ptr [rbx], r11d
+ add rsp, 136+128
+ pop rbx
+ pop rbp
+ pop r12
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_mul_comba_16 ENDP
+
+; void s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C)
+
+
+ ALIGN 16
+s_mp_mul_comba_32 PROC ; a "FRAME" function
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+
+ push rbp
+ mov rbp, rsp
+ push r13
+ mov r13, rdx
+; mov edx, 256
+ mov r8d, 256
+ push r12
+ mov r12, rsi
+ push rbx
+ mov rbx, rdi
+ sub rsp, 520+32 ; +32 for "home" storage
+; mov rsi, qword ptr [16+rdi]
+; lea rdi, qword ptr [-544+rbp]
+ mov rdx, qword ptr [16+rdi]
+ lea rcx, qword ptr [-544+rbp]
+ call memcpy
+; mov rsi, qword ptr [16+r12]
+; lea rdi, qword ptr [-288+rbp]
+; mov edx, 256
+ mov rdx, qword ptr [16+r12]
+ lea rcx, qword ptr [-288+rbp]
+ mov r8d, 256
+ call memcpy
+ mov r9, qword ptr [16+r13]
+ xor r8d, r8d
+ mov rsi, r8
+ mov rdi, r8
+ mov r10, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov qword ptr [r9], rsi
+ mov rsi, r10
+ mov r10, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-280+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc r10, 0
+ mov r11, r10
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-288+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc r11, 0
+ mov qword ptr [8+r9], rdi
+ mov rdi, r11
+ mov r11, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc r11, 0
+ mov rcx, r11
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [16+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [24+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [32+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [40+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [48+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [56+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [64+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [72+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [80+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [88+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [96+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [104+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [112+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [120+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [128+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [136+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [144+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [152+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [160+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [168+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [176+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [184+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [192+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [200+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [208+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [216+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [224+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [232+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [240+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [248+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [256+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [264+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [272+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [280+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [288+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [296+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [304+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [312+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [320+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [328+r9], rcx
+ mov rdi, r11
+ mov r11, r10
+ mov r10, r8
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-40+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-48+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-56+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-64+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-72+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-80+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-88+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-96+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-104+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-112+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-120+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-128+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-136+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-144+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-152+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-160+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-168+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-176+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-184+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-192+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-200+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov qword ptr [336+r9], r11
+ mov rsi, r10
+ mov r10, r8
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-40+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc r10, 0
+ mov rcx, r10
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-48+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-56+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-64+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-72+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-80+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-88+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-96+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-104+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-112+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-120+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-128+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-136+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-144+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-152+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-160+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-168+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-176+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-184+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov r11, rsi
+ mov r10, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-192+rbp]
+ add rdi, rax
+ adc r11, rdx
+ adc r10, 0
+ mov qword ptr [344+r9], rdi
+ mov rcx, r11
+ mov rdi, r10
+ mov r11, r8
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc r11, 0
+ mov rsi, r11
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [352+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [360+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [368+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [376+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [384+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [392+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [400+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [408+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [416+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [424+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [432+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [440+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [448+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [456+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [464+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [472+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [480+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r11, rcx
+ mov r10, rdi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [488+r9], rsi
+ mov rcx, r10
+ mov rsi, r11
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rsi, rdx
+ adc r8, 0
+ mov qword ptr [496+r9], rcx
+ mov ecx, dword ptr [r12]
+ xor ecx, dword ptr [rbx]
+ test rsi, rsi
+ mov qword ptr [504+r9], rsi
+ mov dword ptr [8+r13], 64
+ jne L149
+ ALIGN 16
+L157:
+ mov edx, dword ptr [8+r13]
+ lea ebx, dword ptr [-1+rdx]
+ test ebx, ebx
+ mov dword ptr [8+r13], ebx
+ je L149
+ lea r12d, dword ptr [-2+rdx]
+ cmp qword ptr [r9+r12*8], 0
+ je L157
+L149:
+ mov r9d, dword ptr [8+r13]
+ xor edx, edx
+ test r9d, r9d
+ cmovne edx, ecx
+ mov dword ptr [r13], edx
+ add rsp, 520+32 ; +32 for "home" storage
+ pop rbx
+ pop r12
+ pop r13
+ pop rbp
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_mul_comba_32 ENDP
+
+
+; void s_mp_sqr_comba_4(const mp_int *A, mp_int *B);
+
+ ALIGN 16
+s_mp_sqr_comba_4 PROC
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+
+ push rbp
+ push rbx
+ sub rsp, 80
+ mov r11, rsi
+ xor esi, esi
+ mov r10, rsi
+ mov rbp, rsi
+ mov r8, rsi
+ mov rbx, rsi
+ mov rcx, qword ptr [16+rdi]
+ mov rdi, rsi
+ mov rax, qword ptr [rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc rdi, 0
+ mov qword ptr [-72+80+rsp], r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [8+rcx]
+ add rbx, rax
+ adc rdi, rdx
+ adc rbp, 0
+ add rbx, rax
+ adc rdi, rdx
+ adc rbp, 0
+ mov qword ptr [-64+80+rsp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [16+rcx]
+ add rdi, rax
+ adc rbp, rdx
+ adc r8, 0
+ add rdi, rax
+ adc rbp, rdx
+ adc r8, 0
+ mov rbx, rbp
+ mov rbp, r8
+ mov rax, qword ptr [8+rcx]
+ mul rax
+ add rdi, rax
+ adc rbx, rdx
+ adc rbp, 0
+ mov qword ptr [-56+80+rsp], rdi
+ mov r9, rbp
+ mov r8, rbx
+ mov rdi, rsi
+ mov rax, qword ptr [rcx]
+ mul qword ptr [24+rcx]
+ add r8, rax
+ adc r9, rdx
+ adc rdi, 0
+ add r8, rax
+ adc r9, rdx
+ adc rdi, 0
+ mov rbx, r9
+ mov rbp, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [16+rcx]
+ add r8, rax
+ adc rbx, rdx
+ adc rbp, 0
+ add r8, rax
+ adc rbx, rdx
+ adc rbp, 0
+ mov qword ptr [-48+80+rsp], r8
+ mov r9, rbp
+ mov rdi, rbx
+ mov r8, rsi
+ mov dword ptr [8+r11], 8
+ mov dword ptr [r11], 0
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [24+rcx]
+ add rdi, rax
+ adc r9, rdx
+ adc r8, 0
+ add rdi, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbx, r9
+ mov rbp, r8
+ mov rax, qword ptr [16+rcx]
+ mul rax
+ add rdi, rax
+ adc rbx, rdx
+ adc rbp, 0
+ mov rax, rbp
+ mov qword ptr [-40+80+rsp], rdi
+ mov rbp, rbx
+ mov rdi, rax
+ mov rbx, rsi
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [24+rcx]
+ add rbp, rax
+ adc rdi, rdx
+ adc rbx, 0
+ add rbp, rax
+ adc rdi, rdx
+ adc rbx, 0
+ mov qword ptr [-32+80+rsp], rbp
+ mov r9, rbx
+ mov rax, qword ptr [24+rcx]
+ mul rax
+ add rdi, rax
+ adc r9, rdx
+ adc rsi, 0
+ mov rdx, qword ptr [16+r11]
+ mov qword ptr [-24+80+rsp], rdi
+ mov qword ptr [-16+80+rsp], r9
+ mov qword ptr [rdx], r10
+ mov r8, qword ptr [-64+80+rsp]
+ mov qword ptr [8+rdx], r8
+ mov rbp, qword ptr [-56+80+rsp]
+ mov qword ptr [16+rdx], rbp
+ mov rdi, qword ptr [-48+80+rsp]
+ mov qword ptr [24+rdx], rdi
+ mov rsi, qword ptr [-40+80+rsp]
+ mov qword ptr [32+rdx], rsi
+ mov rbx, qword ptr [-32+80+rsp]
+ mov qword ptr [40+rdx], rbx
+ mov rcx, qword ptr [-24+80+rsp]
+ mov qword ptr [48+rdx], rcx
+ mov rax, qword ptr [-16+80+rsp]
+ mov qword ptr [56+rdx], rax
+ mov edx, dword ptr [8+r11]
+ test edx, edx
+ je L168
+ lea ecx, dword ptr [-1+rdx]
+ mov rsi, qword ptr [16+r11]
+ mov r10d, ecx
+ cmp qword ptr [rsi+r10*8], 0
+ jne L166
+ mov edx, ecx
+ ALIGN 16
+L167:
+ test edx, edx
+ mov ecx, edx
+ je L171
+ dec edx
+ mov eax, edx
+ cmp qword ptr [rsi+rax*8], 0
+ je L167
+ mov dword ptr [8+r11], ecx
+ mov edx, ecx
+L166:
+ test edx, edx
+ je L168
+ mov eax, dword ptr [r11]
+ jmp L169
+
+L171:
+ mov dword ptr [8+r11], edx
+L168:
+ xor eax, eax
+L169:
+ add rsp, 80
+ pop rbx
+ pop rbp
+ mov dword ptr [r11], eax
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_sqr_comba_4 ENDP
+
+
+; void s_mp_sqr_comba_8(const mp_int *A, mp_int *B);
+
+ ALIGN 16
+s_mp_sqr_comba_8 PROC
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+
+ push r14
+ xor r9d, r9d
+ mov r14, r9
+ mov r10, r9
+ push r13
+ mov r13, r9
+ push r12
+ mov r12, r9
+ push rbp
+ mov rbp, rsi
+ mov rsi, r9
+ push rbx
+ mov rbx, r9
+ sub rsp, 8+128
+ mov rcx, qword ptr [16+rdi]
+ mov rax, qword ptr [rcx]
+ mul rax
+ add r14, rax
+ adc rbx, rdx
+ adc r12, 0
+ mov qword ptr [-120+128+rsp], r14
+ mov rax, qword ptr [rcx]
+ mul qword ptr [8+rcx]
+ add rbx, rax
+ adc r12, rdx
+ adc r10, 0
+ add rbx, rax
+ adc r12, rdx
+ adc r10, 0
+ mov qword ptr [-112+128+rsp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [16+rcx]
+ add r12, rax
+ adc r10, rdx
+ adc r13, 0
+ add r12, rax
+ adc r10, rdx
+ adc r13, 0
+ mov rbx, r10
+ mov r10, r13
+ mov r13, r9
+ mov rax, qword ptr [8+rcx]
+ mul rax
+ add r12, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov qword ptr [-104+128+rsp], r12
+ mov rdi, r10
+ mov r11, rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [24+rcx]
+ add r11, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r11, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, rdi
+ mov r10, rsi
+ mov rdi, r9
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [16+rcx]
+ add r11, rax
+ adc rbx, rdx
+ adc r10, 0
+ add r11, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov rsi, r9
+ mov qword ptr [-96+128+rsp], r11
+ mov r8, r10
+ mov r12, rbx
+ mov r11, r9
+ mov rax, qword ptr [rcx]
+ mul qword ptr [32+rcx]
+ add r12, rax
+ adc r8, rdx
+ adc r13, 0
+ add r12, rax
+ adc r8, rdx
+ adc r13, 0
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [24+rcx]
+ add r12, rax
+ adc r8, rdx
+ adc r13, 0
+ add r12, rax
+ adc r8, rdx
+ adc r13, 0
+ mov rbx, r8
+ mov r10, r13
+ mov r8, r9
+ mov rax, qword ptr [16+rcx]
+ mul rax
+ add r12, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov qword ptr [-88+128+rsp], r12
+ mov rax, qword ptr [rcx]
+ mul qword ptr [40+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [24+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r10, rdi
+ adc r11, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc r11, rsi
+ mov qword ptr [-80+128+rsp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [48+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [24+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-72+128+rsp], r10
+ mov r10, r11
+ mov rax, qword ptr [rcx]
+ mul qword ptr [56+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ add rbx, r8
+ adc r10, rdi
+ adc rax, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc rax, rsi
+ mov qword ptr [-64+128+rsp], rbx
+ mov r11, rax
+ mov rbx, r9
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [56+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rsi, rbx
+ mov rdi, r13
+ mov rbx, r11
+ mov r13, r12
+ mov r11, rsi
+ mov rax, qword ptr [32+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-56+128+rsp], r10
+ mov r10, r9
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [56+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor r13, r13
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc r13, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc r13, 0
+ mov r12, rdi
+ mov rax, r13
+ add rbx, r8
+ adc r11, r12
+ adc r10, rax
+ add rbx, r8
+ adc r11, r12
+ adc r10, rax
+ mov qword ptr [-48+128+rsp], rbx
+ mov r12, r11
+ mov rsi, r10
+ mov rbx, r9
+ mov r11, r9
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [56+rcx]
+ add r12, rax
+ adc rsi, rdx
+ adc rbx, 0
+ add r12, rax
+ adc rsi, rdx
+ adc rbx, 0
+ mov r13, rbx
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [48+rcx]
+ add r12, rax
+ adc rsi, rdx
+ adc r13, 0
+ add r12, rax
+ adc rsi, rdx
+ adc r13, 0
+ mov r10, rsi
+ mov rbx, r13
+ mov r13, r9
+ mov rax, qword ptr [40+rcx]
+ mul rax
+ add r12, rax
+ adc r10, rdx
+ adc rbx, 0
+ mov qword ptr [-40+128+rsp], r12
+ mov r8, rbx
+ mov rdi, r10
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [56+rcx]
+ add rdi, rax
+ adc r8, rdx
+ adc r11, 0
+ add rdi, rax
+ adc r8, rdx
+ adc r11, 0
+ mov r10, r8
+ mov rbx, r11
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [48+rcx]
+ add rdi, rax
+ adc r10, rdx
+ adc rbx, 0
+ add rdi, rax
+ adc r10, rdx
+ adc rbx, 0
+ mov qword ptr [-32+128+rsp], rdi
+ mov rsi, rbx
+ mov r12, r10
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [56+rcx]
+ add r12, rax
+ adc rsi, rdx
+ adc r13, 0
+ add r12, rax
+ adc rsi, rdx
+ adc r13, 0
+ mov r10, rsi
+ mov rbx, r13
+ mov rax, qword ptr [48+rcx]
+ mul rax
+ add r12, rax
+ adc r10, rdx
+ adc rbx, 0
+ mov qword ptr [-24+128+rsp], r12
+ mov rdi, r10
+ mov rsi, rbx
+ mov r10, r9
+ mov dword ptr [8+rbp], 16
+ mov dword ptr [rbp], 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [56+rcx]
+ add rdi, rax
+ adc rsi, rdx
+ adc r10, 0
+ add rdi, rax
+ adc rsi, rdx
+ adc r10, 0
+ mov qword ptr [-16+128+rsp], rdi
+ mov r8, r10
+ mov rax, qword ptr [56+rcx]
+ mul rax
+ add rsi, rax
+ adc r8, rdx
+ adc r9, 0
+ mov rax, qword ptr [16+rbp]
+ mov qword ptr [-8+128+rsp], rsi
+ mov qword ptr [128+rsp], r8
+ mov qword ptr [rax], r14
+ mov rbx, qword ptr [-112+128+rsp]
+ mov qword ptr [8+rax], rbx
+ mov rcx, qword ptr [-104+128+rsp]
+ mov qword ptr [16+rax], rcx
+ mov rdx, qword ptr [-96+128+rsp]
+ mov qword ptr [24+rax], rdx
+ mov r14, qword ptr [-88+128+rsp]
+ mov qword ptr [32+rax], r14
+ mov r13, qword ptr [-80+128+rsp]
+ mov qword ptr [40+rax], r13
+ mov r12, qword ptr [-72+128+rsp]
+ mov qword ptr [48+rax], r12
+ mov r11, qword ptr [-64+128+rsp]
+ mov qword ptr [56+rax], r11
+ mov r10, qword ptr [-56+128+rsp]
+ mov qword ptr [64+rax], r10
+ mov r9, qword ptr [-48+128+rsp]
+ mov qword ptr [72+rax], r9
+ mov r8, qword ptr [-40+128+rsp]
+ mov qword ptr [80+rax], r8
+ mov rdi, qword ptr [-32+128+rsp]
+ mov qword ptr [88+rax], rdi
+ mov rsi, qword ptr [-24+128+rsp]
+ mov qword ptr [96+rax], rsi
+ mov rbx, qword ptr [-16+128+rsp]
+ mov qword ptr [104+rax], rbx
+ mov rcx, qword ptr [-8+128+rsp]
+ mov qword ptr [112+rax], rcx
+ mov rdx, qword ptr [128+rsp]
+ mov qword ptr [120+rax], rdx
+ mov edx, dword ptr [8+rbp]
+ test edx, edx
+ je L192
+ lea ecx, dword ptr [-1+rdx]
+ mov rsi, qword ptr [16+rbp]
+ mov r14d, ecx
+ cmp qword ptr [rsi+r14*8], 0
+ jne L190
+ mov edx, ecx
+ ALIGN 16
+L191:
+ test edx, edx
+ mov ecx, edx
+ je L195
+ dec edx
+ mov r9d, edx
+ cmp qword ptr [rsi+r9*8], 0
+ je L191
+ mov dword ptr [8+rbp], ecx
+ mov edx, ecx
+L190:
+ test edx, edx
+ je L192
+ mov eax, dword ptr [rbp]
+ jmp L193
+
+L195:
+ mov dword ptr [8+rbp], edx
+L192:
+ xor eax, eax
+L193:
+ mov dword ptr [rbp], eax
+ add rsp, 8+128
+ pop rbx
+ pop rbp
+ pop r12
+ pop r13
+ pop r14
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_sqr_comba_8 ENDP
+
+
+; void s_mp_sqr_comba_16(const mp_int *A, mp_int *B)
+
+ ALIGN 16
+s_mp_sqr_comba_16 PROC ; A "FRAME" function
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+
+ push rbp
+ xor r9d, r9d
+ mov r8, r9
+ mov r11, r9
+ mov rbp, rsp
+ push r14
+ mov r14, rsi
+ mov rsi, r9
+ push r13
+ mov r13, r9
+ push r12
+ mov r12, r9
+ push rbx
+ mov rbx, r9
+ sub rsp, 256+32 ; +32 for "home" storage
+ mov rcx, qword ptr [16+rdi]
+ mov rax, qword ptr [rcx]
+ mul rax
+ add r8, rax
+ adc rbx, rdx
+ adc rsi, 0
+ mov qword ptr [-288+rbp], r8
+ mov rax, qword ptr [rcx]
+ mul qword ptr [8+rcx]
+ add rbx, rax
+ adc rsi, rdx
+ adc r12, 0
+ add rbx, rax
+ adc rsi, rdx
+ adc r12, 0
+ mov qword ptr [-280+rbp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [16+rcx]
+ add rsi, rax
+ adc r12, rdx
+ adc r13, 0
+ add rsi, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rbx, r12
+ mov r10, r13
+ mov rax, qword ptr [8+rcx]
+ mul rax
+ add rsi, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov qword ptr [-272+rbp], rsi
+ mov rdi, r10
+ mov rsi, r9
+ mov r10, rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [24+rcx]
+ add r10, rax
+ adc rdi, rdx
+ adc r11, 0
+ add r10, rax
+ adc rdi, rdx
+ adc r11, 0
+ mov r12, rdi
+ mov rbx, r11
+ mov rdi, r9
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [16+rcx]
+ add r10, rax
+ adc r12, rdx
+ adc rbx, 0
+ add r10, rax
+ adc r12, rdx
+ adc rbx, 0
+ mov r11, r9
+ mov qword ptr [-264+rbp], r10
+ mov r8, rbx
+ mov r13, r12
+ mov r12, r9
+ mov rax, qword ptr [rcx]
+ mul qword ptr [32+rcx]
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [24+rcx]
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ mov rbx, r8
+ mov r10, r12
+ mov r8, r9
+ mov rax, qword ptr [16+rcx]
+ mul rax
+ add r13, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov qword ptr [-256+rbp], r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [40+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [24+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r10, rdi
+ adc r11, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc r11, rsi
+ mov qword ptr [-248+rbp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [48+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [24+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-240+rbp], r10
+ mov r10, r11
+ mov rax, qword ptr [rcx]
+ mul qword ptr [56+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r9
+ add rbx, r8
+ adc r10, rdi
+ adc rdx, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc rdx, rsi
+ mov r11, rdx
+ mov qword ptr [-232+rbp], rbx
+ mov rbx, r9
+ mov rax, qword ptr [rcx]
+ mul qword ptr [64+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rax, qword ptr [32+rcx]
+ mul rax
+ add r10, rax
+ adc r11, rdx
+ adc rbx, 0
+ mov rdi, r13
+ mov qword ptr [-224+rbp], r10
+ mov rsi, r12
+ mov r10, rbx
+ mov r12, r9
+ mov rax, qword ptr [rcx]
+ mul qword ptr [72+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r11, r8
+ adc r10, rdi
+ adc r12, rsi
+ add r11, r8
+ adc r10, rdi
+ adc r12, rsi
+ mov qword ptr [-216+rbp], r11
+ mov rbx, r12
+ mov rax, qword ptr [rcx]
+ mul qword ptr [80+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc rbx, r13
+ adc rax, r12
+ add r10, r8
+ adc rbx, r13
+ adc rax, r12
+ mov rdx, rax
+ mov r11, rbx
+ mov rdi, r13
+ mov rbx, rdx
+ mov rsi, r12
+ mov rax, qword ptr [40+rcx]
+ mul rax
+ add r10, rax
+ adc r11, rdx
+ adc rbx, 0
+ mov qword ptr [-208+rbp], r10
+ mov r10, rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [88+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r9
+ add r11, r8
+ adc r10, rdi
+ adc rdx, rsi
+ add r11, r8
+ adc r10, rdi
+ adc rdx, rsi
+ mov r13, rdx
+ mov qword ptr [-200+rbp], r11
+ mov r12, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [96+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov rdx, rdi
+ mov r11, rsi
+ add r10, r8
+ adc r12, rdx
+ adc rax, r11
+ add r10, r8
+ adc r12, rdx
+ adc rax, r11
+ mov rbx, rdx
+ mov r13, rax
+ mov rsi, r11
+ mov rax, qword ptr [48+rcx]
+ mul rax
+ add r10, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rdi, rbx
+ mov qword ptr [-192+rbp], r10
+ mov r10, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [104+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r9
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r10, rdi
+ adc r13, rsi
+ add r12, r8
+ adc r10, rdi
+ adc r13, rsi
+ mov qword ptr [-184+rbp], r12
+ mov r12, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [112+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov rbx, rdi
+ mov rdx, rsi
+ add r10, r8
+ adc r12, rbx
+ adc rax, rdx
+ add r10, r8
+ adc r12, rbx
+ adc rax, rdx
+ mov r11, rdx
+ mov r13, rax
+ mov rdi, rbx
+ mov rax, qword ptr [56+rcx]
+ mul rax
+ add r10, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-176+rbp], r10
+ mov r10, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r9
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r10, rdi
+ adc r13, rsi
+ add r12, r8
+ adc r10, rdi
+ adc r13, rsi
+ mov qword ptr [-168+rbp], r12
+ mov r12, r13
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov rbx, rdi
+ mov rdx, rsi
+ add r10, r8
+ adc r12, rbx
+ adc rax, rdx
+ add r10, r8
+ adc r12, rbx
+ adc rax, rdx
+ mov r11, rdx
+ mov r13, rax
+ mov rdi, rbx
+ mov rax, qword ptr [64+rcx]
+ mul rax
+ add r10, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-160+rbp], r10
+ mov r11, r9
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r10, r13
+ mov rbx, r9
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r10, rdi
+ adc r11, rsi
+ add r12, r8
+ adc r10, rdi
+ adc r11, rsi
+ mov qword ptr [-152+rbp], r12
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rdx, rbx
+ mov rdi, r13
+ mov rbx, r11
+ mov rsi, r12
+ mov r11, rdx
+ mov r12, r9
+ mov rax, qword ptr [72+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-144+rbp], r10
+ mov r10, r11
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r10, rdi
+ adc r12, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc r12, rsi
+ mov qword ptr [-136+rbp], rbx
+ mov r11, r12
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [80+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-128+rbp], r10
+ mov r10, r11
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r9
+ add rbx, r8
+ adc r10, rdi
+ adc rdx, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc rdx, rsi
+ mov qword ptr [-120+rbp], rbx
+ mov r11, rdx
+ mov rbx, r9
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rdx, rbx
+ mov rdi, r13
+ mov rbx, r11
+ mov rsi, r12
+ mov r11, rdx
+ mov r12, r9
+ mov rax, qword ptr [88+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-112+rbp], r10
+ mov r10, r11
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r10, rdi
+ adc r12, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc r12, rsi
+ mov qword ptr [-104+rbp], rbx
+ mov r11, r12
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [96+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-96+rbp], r10
+ mov r10, r9
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r12, rdi
+ mov rax, rsi
+ mov rsi, r9
+ add rbx, r8
+ adc r11, r12
+ adc r10, rax
+ add rbx, r8
+ adc r11, r12
+ adc r10, rax
+ mov r12, r9
+ mov qword ptr [-88+rbp], rbx
+ mov r13, r11
+ mov r11, r10
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [120+rcx]
+ add r13, rax
+ adc r11, rdx
+ adc r12, 0
+ add r13, rax
+ adc r11, rdx
+ adc r12, 0
+ mov rdi, r12
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [112+rcx]
+ add r13, rax
+ adc r11, rdx
+ adc rdi, 0
+ add r13, rax
+ adc r11, rdx
+ adc rdi, 0
+ mov rbx, r11
+ mov r10, rdi
+ mov r11, r9
+ mov rax, qword ptr [104+rcx]
+ mul rax
+ add r13, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov qword ptr [-80+rbp], r13
+ mov r8, r10
+ mov r10, rbx
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [120+rcx]
+ add r10, rax
+ adc r8, rdx
+ adc rsi, 0
+ add r10, rax
+ adc r8, rdx
+ adc rsi, 0
+ mov r12, r8
+ mov rbx, rsi
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [112+rcx]
+ add r10, rax
+ adc r12, rdx
+ adc rbx, 0
+ add r10, rax
+ adc r12, rdx
+ adc rbx, 0
+ mov qword ptr [-72+rbp], r10
+ mov r13, rbx
+ mov rbx, r12
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [120+rcx]
+ add rbx, rax
+ adc r13, rdx
+ adc r11, 0
+ add rbx, rax
+ adc r13, rdx
+ adc r11, 0
+ mov r12, r11
+ mov r10, r13
+ mov rax, qword ptr [112+rcx]
+ mul rax
+ add rbx, rax
+ adc r10, rdx
+ adc r12, 0
+ mov qword ptr [-64+rbp], rbx
+ mov rdi, r10
+ mov rbx, r9
+ mov rsi, r12
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [120+rcx]
+ add rdi, rax
+ adc rsi, rdx
+ adc rbx, 0
+ add rdi, rax
+ adc rsi, rdx
+ adc rbx, 0
+ mov qword ptr [-56+rbp], rdi
+ mov r8, rbx
+ mov rax, qword ptr [120+rcx]
+ mul rax
+ add rsi, rax
+ adc r8, rdx
+ adc r9, 0
+ mov qword ptr [-48+rbp], rsi
+ mov qword ptr [-40+rbp], r8
+ mov dword ptr [8+r14], 32
+ mov dword ptr [r14], 0
+; mov rdi, qword ptr [16+r14]
+; lea rsi, qword ptr [-288+rbp]
+; mov edx, 256
+ mov rcx, qword ptr [16+r14]
+ lea rdx, qword ptr [-288+rbp]
+ mov r8d, 256
+ call memcpy
+ mov edx, dword ptr [8+r14]
+ test edx, edx
+ je L232
+ lea ecx, dword ptr [-1+rdx]
+ mov rsi, qword ptr [16+r14]
+ mov r9d, ecx
+ cmp qword ptr [rsi+r9*8], 0
+ jne L230
+ mov edx, ecx
+ ALIGN 16
+L231:
+ test edx, edx
+ mov ecx, edx
+ je L235
+ dec edx
+ mov eax, edx
+ cmp qword ptr [rsi+rax*8], 0
+ je L231
+ mov dword ptr [8+r14], ecx
+ mov edx, ecx
+L230:
+ test edx, edx
+ je L232
+ mov eax, dword ptr [r14]
+ jmp L233
+
+L235:
+ mov dword ptr [8+r14], edx
+L232:
+ xor eax, eax
+L233:
+ mov dword ptr [r14], eax
+ add rsp, 256+32 ; +32 for "home" storage
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop rbp
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_sqr_comba_16 ENDP
+
+
+; void s_mp_sqr_comba_32(const mp_int *A, mp_int *B);
+
+ ALIGN 16
+s_mp_sqr_comba_32 PROC ; A "FRAME" function
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+
+ push rbp
+ xor r10d, r10d
+ mov r8, r10
+ mov r11, r10
+ mov rbp, rsp
+ push r14
+ mov r14, rsi
+ mov rsi, r10
+ push r13
+ mov r13, r10
+ push r12
+ mov r12, r10
+ push rbx
+ mov rbx, r10
+ sub rsp, 512+32 ; +32 for "home" storage
+ mov rcx, qword ptr [16+rdi]
+ mov rax, qword ptr [rcx]
+ mul rax
+ add r8, rax
+ adc rbx, rdx
+ adc rsi, 0
+ mov qword ptr [-544+rbp], r8
+ mov rax, qword ptr [rcx]
+ mul qword ptr [8+rcx]
+ add rbx, rax
+ adc rsi, rdx
+ adc r12, 0
+ add rbx, rax
+ adc rsi, rdx
+ adc r12, 0
+ mov qword ptr [-536+rbp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [16+rcx]
+ add rsi, rax
+ adc r12, rdx
+ adc r13, 0
+ add rsi, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rbx, r12
+ mov r9, r13
+ mov rax, qword ptr [8+rcx]
+ mul rax
+ add rsi, rax
+ adc rbx, rdx
+ adc r9, 0
+ mov qword ptr [-528+rbp], rsi
+ mov rdi, r9
+ mov rsi, r10
+ mov r9, rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [24+rcx]
+ add r9, rax
+ adc rdi, rdx
+ adc r11, 0
+ add r9, rax
+ adc rdi, rdx
+ adc r11, 0
+ mov r12, rdi
+ mov r13, r11
+ mov rdi, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [16+rcx]
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov r11, r10
+ mov qword ptr [-520+rbp], r9
+ mov r8, r13
+ mov r13, r12
+ mov r12, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [32+rcx]
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [24+rcx]
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ mov rbx, r8
+ mov r9, r12
+ mov r8, r10
+ mov rax, qword ptr [16+rcx]
+ mul rax
+ add r13, rax
+ adc rbx, rdx
+ adc r9, 0
+ mov qword ptr [-512+rbp], r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [40+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [24+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r9, rdi
+ adc r11, rsi
+ add rbx, r8
+ adc r9, rdi
+ adc r11, rsi
+ mov qword ptr [-504+rbp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [48+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r10
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [24+rcx]
+ mul rax
+ add r9, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-496+rbp], r9
+ mov r9, r11
+ mov rax, qword ptr [rcx]
+ mul qword ptr [56+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r10
+ add rbx, r8
+ adc r9, rdi
+ adc rdx, rsi
+ add rbx, r8
+ adc r9, rdi
+ adc rdx, rsi
+ mov r11, rdx
+ mov qword ptr [-488+rbp], rbx
+ mov rbx, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [64+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc r11, r13
+ adc rbx, r12
+ add r9, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rax, qword ptr [32+rcx]
+ mul rax
+ add r9, rax
+ adc r11, rdx
+ adc rbx, 0
+ mov rdi, r13
+ mov qword ptr [-480+rbp], r9
+ mov rsi, r12
+ mov r9, rbx
+ mov r12, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [72+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r11, r8
+ adc r9, rdi
+ adc r12, rsi
+ add r11, r8
+ adc r9, rdi
+ adc r12, rsi
+ mov qword ptr [-472+rbp], r11
+ mov rbx, r12
+ mov rax, qword ptr [rcx]
+ mul qword ptr [80+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r10
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc rbx, r13
+ adc rax, r12
+ add r9, r8
+ adc rbx, r13
+ adc rax, r12
+ mov rdx, rax
+ mov r11, rbx
+ mov rdi, r13
+ mov rbx, rdx
+ mov rsi, r12
+ mov rax, qword ptr [40+rcx]
+ mul rax
+ add r9, rax
+ adc r11, rdx
+ adc rbx, 0
+ mov qword ptr [-464+rbp], r9
+ mov r9, rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [88+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r10
+ add r11, r8
+ adc r9, rdi
+ adc rdx, rsi
+ add r11, r8
+ adc r9, rdi
+ adc rdx, rsi
+ mov r13, rdx
+ mov qword ptr [-456+rbp], r11
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [96+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, rdi
+ mov r11, rsi
+ add r9, r8
+ adc r12, rax
+ adc r13, r11
+ add r9, r8
+ adc r12, rax
+ adc r13, r11
+ mov rbx, rax
+ mov rsi, r11
+ mov rax, qword ptr [48+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rdi, rbx
+ mov qword ptr [-448+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [104+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r9, rdi
+ adc r13, rsi
+ add r12, r8
+ adc r9, rdi
+ adc r13, rsi
+ mov qword ptr [-440+rbp], r12
+ mov r12, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [112+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r13
+ mov rbx, rdi
+ mov r13, rsi
+ add r9, r8
+ adc rdx, rbx
+ adc r12, r13
+ add r9, r8
+ adc rdx, rbx
+ adc r12, r13
+ mov rax, r12
+ mov r11, r13
+ mov r12, rdx
+ mov r13, rax
+ mov rdi, rbx
+ mov rsi, r11
+ mov rax, qword ptr [56+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-432+rbp], r9
+ mov r9, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r8
+ mov rdx, rdi
+ mov rbx, rsi
+ add r12, rax
+ adc r9, rdx
+ adc r13, rbx
+ add r12, rax
+ adc r9, rdx
+ adc r13, rbx
+ mov qword ptr [-424+rbp], r12
+ mov r8, rdx
+ mov rsi, rax
+ mov rdi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [128+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [96+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [88+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [80+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [72+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [64+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-416+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [136+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [128+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [120+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-408+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [144+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [96+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [88+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [80+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [72+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-400+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [152+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [144+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [136+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [128+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [120+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-392+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [160+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [96+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [88+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [80+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-384+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [168+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [160+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [152+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [144+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [136+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [128+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [120+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-376+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [176+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [96+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [88+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-368+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [184+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [176+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [168+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [160+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [152+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [144+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [136+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [128+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [120+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov rdi, rdx
+ mov qword ptr [-360+rbp], r12
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [192+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rbx, r8
+ mov rax, rdi
+ add r9, rsi
+ adc r12, rbx
+ adc r13, rax
+ add r9, rsi
+ adc r12, rbx
+ adc r13, rax
+ mov r11, rax
+ mov r8, rbx
+ mov rax, qword ptr [96+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rdi, r11
+ mov qword ptr [-352+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [200+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ mov qword ptr [-344+rbp], r12
+ mov r12, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [208+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rdx, r13
+ mov rbx, r8
+ mov r13, rdi
+ add r9, rsi
+ adc rdx, rbx
+ adc r12, r13
+ add r9, rsi
+ adc rdx, rbx
+ adc r12, r13
+ mov rax, r12
+ mov r11, r13
+ mov r12, rdx
+ mov r13, rax
+ mov r8, rbx
+ mov rdi, r11
+ mov rax, qword ptr [104+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-336+rbp], r9
+ mov r9, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [216+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ mov qword ptr [-328+rbp], r12
+ mov rax, qword ptr [rcx]
+ mul qword ptr [224+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, r13
+ mov rdx, r10
+ mov rbx, r8
+ mov r12, rdi
+ add r9, rsi
+ adc rax, rbx
+ adc rdx, r12
+ add r9, rsi
+ adc rax, rbx
+ adc rdx, r12
+ mov rdi, rdx
+ mov r11, r12
+ mov r8, rbx
+ mov r12, rax
+ mov r13, rdi
+ mov rdi, r11
+ mov rax, qword ptr [112+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-320+rbp], r9
+ mov rbx, r13
+ mov r9, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [232+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc rbx, r8
+ adc r9, rdi
+ add r12, rsi
+ adc rbx, r8
+ adc r9, rdi
+ mov qword ptr [-312+rbp], r12
+ mov r13, r9
+ mov rax, qword ptr [rcx]
+ mul qword ptr [240+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, r10
+ mov r11, r8
+ mov rdx, rdi
+ add rbx, rsi
+ adc r13, r11
+ adc rax, rdx
+ add rbx, rsi
+ adc r13, r11
+ adc rax, rdx
+ mov r9, rdx
+ mov rdx, rax
+ mov r12, r13
+ mov r8, r11
+ mov r13, rdx
+ mov rdi, r9
+ mov rax, qword ptr [120+rcx]
+ mul rax
+ add rbx, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-304+rbp], rbx
+ mov rbx, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc rbx, r8
+ adc r13, rdi
+ add r12, rsi
+ adc rbx, r8
+ adc r13, rdi
+ mov qword ptr [-296+rbp], r12
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov r11, r8
+ mov rax, rdi
+ add rbx, rsi
+ adc r12, r11
+ adc r13, rax
+ add rbx, rsi
+ adc r12, r11
+ adc r13, rax
+ mov r9, rax
+ mov r8, r11
+ mov rax, qword ptr [128+rcx]
+ mul rax
+ add rbx, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rdi, r9
+ mov qword ptr [-288+rbp], rbx
+ mov r9, r13
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov r13, r10
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ mov qword ptr [-280+rbp], r12
+ mov r12, r10
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rdx, r13
+ mov rbx, r8
+ mov r13, rdi
+ add r9, rsi
+ adc rdx, rbx
+ adc r12, r13
+ add r9, rsi
+ adc rdx, rbx
+ adc r12, r13
+ mov rax, r12
+ mov r11, r13
+ mov r12, rdx
+ mov r13, rax
+ mov r8, rbx
+ mov rdi, r11
+ mov rax, qword ptr [136+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-272+rbp], r9
+ mov r9, r13
+ mov r13, r10
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ mov qword ptr [-264+rbp], r12
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, r13
+ mov rdx, r10
+ mov rbx, r8
+ mov r12, rdi
+ add r9, rsi
+ adc rax, rbx
+ adc rdx, r12
+ add r9, rsi
+ adc rax, rbx
+ adc rdx, r12
+ mov rdi, rdx
+ mov r11, r12
+ mov r8, rbx
+ mov r12, rax
+ mov r13, rdi
+ mov rdi, r11
+ mov rax, qword ptr [144+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov r11, r10
+ mov qword ptr [-256+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc r9, r8
+ adc r11, rdi
+ add r12, rsi
+ adc r9, r8
+ adc r11, rdi
+ mov qword ptr [-248+rbp], r12
+ mov r13, r11
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, r10
+ mov rdx, rsi
+ mov rbx, r8
+ mov r12, rdi
+ add r9, rdx
+ adc r13, rbx
+ adc rax, r12
+ add r9, rdx
+ adc r13, rbx
+ adc rax, r12
+ mov r11, r12
+ mov r8, rdx
+ mov rdx, rax
+ mov r12, r13
+ mov rdi, rbx
+ mov r13, rdx
+ mov rsi, r11
+ mov rax, qword ptr [152+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-240+rbp], r9
+ mov r9, r13
+ mov r13, r10
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [192+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [184+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [176+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [168+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [160+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r8
+ mov rdx, rdi
+ mov rbx, rsi
+ add r12, rax
+ adc r9, rdx
+ adc r13, rbx
+ add r12, rax
+ adc r9, rdx
+ adc r13, rbx
+ mov qword ptr [-232+rbp], r12
+ mov r8, rdx
+ mov rsi, rax
+ mov rdi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [160+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-224+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [192+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [184+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [176+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [168+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-216+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [168+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-208+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [192+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [184+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [176+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-200+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [176+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-192+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [192+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [184+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-184+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [184+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-176+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [192+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r9, rdi
+ adc r13, rsi
+ add r12, r8
+ adc r9, rdi
+ adc r13, rsi
+ mov qword ptr [-168+rbp], r12
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, rdi
+ mov rax, rsi
+ add r9, r8
+ adc r12, rbx
+ adc r13, rax
+ add r9, r8
+ adc r12, rbx
+ adc r13, rax
+ mov r11, rax
+ mov rdi, rbx
+ mov rbx, r10
+ mov rax, qword ptr [192+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-160+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r9, rdi
+ adc rbx, rsi
+ add r12, r8
+ adc r9, rdi
+ adc rbx, rsi
+ mov qword ptr [-152+rbp], r12
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r10
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc rbx, r13
+ adc rdx, r12
+ add r9, r8
+ adc rbx, r13
+ adc rdx, r12
+ mov rax, rdx
+ mov rdi, r13
+ mov rsi, r12
+ mov r11, rax
+ mov r12, r10
+ mov rax, qword ptr [200+rcx]
+ mul rax
+ add r9, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-144+rbp], r9
+ mov r9, r11
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r9, rdi
+ adc r12, rsi
+ add rbx, r8
+ adc r9, rdi
+ adc r12, rsi
+ mov qword ptr [-136+rbp], rbx
+ mov r11, r12
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r10
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [208+rcx]
+ mul rax
+ add r9, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-128+rbp], r9
+ mov r9, r11
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [208+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r10
+ add rbx, r8
+ adc r9, rdi
+ adc rdx, rsi
+ add rbx, r8
+ adc r9, rdi
+ adc rdx, rsi
+ mov qword ptr [-120+rbp], rbx
+ mov r11, rdx
+ mov rbx, r10
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [208+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc r11, r13
+ adc rbx, r12
+ add r9, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rdx, rbx
+ mov rdi, r13
+ mov rbx, r11
+ mov rsi, r12
+ mov r11, rdx
+ mov r12, r10
+ mov rax, qword ptr [216+rcx]
+ mul rax
+ add r9, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-112+rbp], r9
+ mov r9, r11
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [208+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [216+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r9, rdi
+ adc r12, rsi
+ add rbx, r8
+ adc r9, rdi
+ adc r12, rsi
+ mov qword ptr [-104+rbp], rbx
+ mov r11, r12
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [208+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [216+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r10
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov r12, r10
+ mov rax, qword ptr [224+rcx]
+ mul rax
+ add r9, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-96+rbp], r9
+ mov r9, r10
+ mov rax, qword ptr [208+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [216+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [224+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov rax, rsi
+ add rbx, r8
+ adc r11, r13
+ adc r9, rax
+ add rbx, r8
+ adc r11, r13
+ adc r9, rax
+ mov qword ptr [-88+rbp], rbx
+ mov rsi, r11
+ mov r8, r9
+ mov rax, qword ptr [216+rcx]
+ mul qword ptr [248+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc r12, 0
+ add rsi, rax
+ adc r8, rdx
+ adc r12, 0
+ mov r11, r12
+ mov rax, qword ptr [224+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc r11, 0
+ add rsi, rax
+ adc r8, rdx
+ adc r11, 0
+ mov r13, r8
+ mov rbx, r11
+ mov rax, qword ptr [232+rcx]
+ mul rax
+ add rsi, rax
+ adc r13, rdx
+ adc rbx, 0
+ mov qword ptr [-80+rbp], rsi
+ mov r12, rbx
+ mov rdi, r13
+ mov r13, r10
+ mov rax, qword ptr [224+rcx]
+ mul qword ptr [248+rcx]
+ add rdi, rax
+ adc r12, rdx
+ adc r13, 0
+ add rdi, rax
+ adc r12, rdx
+ adc r13, 0
+ mov r9, r12
+ mov r12, r13
+ mov rax, qword ptr [232+rcx]
+ mul qword ptr [240+rcx]
+ add rdi, rax
+ adc r9, rdx
+ adc r12, 0
+ add rdi, rax
+ adc r9, rdx
+ adc r12, 0
+ mov qword ptr [-72+rbp], rdi
+ mov r11, r9
+ mov rbx, r12
+ mov r9, r10
+ mov rax, qword ptr [232+rcx]
+ mul qword ptr [248+rcx]
+ add r11, rax
+ adc rbx, rdx
+ adc r9, 0
+ add r11, rax
+ adc rbx, rdx
+ adc r9, 0
+ mov r13, rbx
+ mov rbx, r9
+ mov r9, r10
+ mov rax, qword ptr [240+rcx]
+ mul rax
+ add r11, rax
+ adc r13, rdx
+ adc rbx, 0
+ mov qword ptr [-64+rbp], r11
+ mov rdi, r13
+ mov rsi, rbx
+ mov rax, qword ptr [240+rcx]
+ mul qword ptr [248+rcx]
+ add rdi, rax
+ adc rsi, rdx
+ adc r9, 0
+ add rdi, rax
+ adc rsi, rdx
+ adc r9, 0
+ mov qword ptr [-56+rbp], rdi
+ mov r8, r9
+ mov rax, qword ptr [248+rcx]
+ mul rax
+ add rsi, rax
+ adc r8, rdx
+ adc r10, 0
+ mov qword ptr [-48+rbp], rsi
+ mov qword ptr [-40+rbp], r8
+ mov dword ptr [8+r14], 64
+ mov dword ptr [r14], 0
+; mov rdi, qword ptr [16+r14]
+; lea rsi, qword ptr [-544+rbp]
+; mov edx, 512
+ mov rcx, qword ptr [16+r14]
+ lea rdx, qword ptr [-544+rbp]
+ mov r8d, 512
+ call memcpy
+ mov edx, dword ptr [8+r14]
+ test edx, edx
+ je L304
+ lea ecx, dword ptr [-1+rdx]
+ mov rsi, qword ptr [16+r14]
+ mov r10d, ecx
+ cmp qword ptr [rsi+r10*8], 0
+ jne L302
+ mov edx, ecx
+ ALIGN 16
+L303:
+ test edx, edx
+ mov ecx, edx
+ je L307
+ dec edx
+ mov eax, edx
+ cmp qword ptr [rsi+rax*8], 0
+ je L303
+ mov dword ptr [8+r14], ecx
+ mov edx, ecx
+L302:
+ test edx, edx
+ je L304
+ mov eax, dword ptr [r14]
+ jmp L305
+
+L307:
+ mov dword ptr [8+r14], edx
+L304:
+ xor eax, eax
+L305:
+ mov dword ptr [r14], eax
+ add rsp, 512+32 ; +32 for "home" storage
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop rbp
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_sqr_comba_32 ENDP
+
+END
diff --git a/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s b/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s
new file mode 100644
index 0000000000..a5181df332
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s
@@ -0,0 +1,16097 @@
+//* TomsFastMath, a fast ISO C bignum library.
+/ *
+/ * This project is meant to fill in where LibTomMath
+/ * falls short. That is speed ;-)
+/ *
+/ * This project is public domain and free for all purposes.
+/ *
+/ * Tom St Denis, tomstdenis@iahu.ca
+/ */
+
+//*
+/ * The source file from which this assembly was derived
+/ * comes from TFM v0.03, which has the above license.
+/ * This source was compiled with an unnamed compiler at
+/ * the highest optimization level. Afterwards, the
+/ * trailing .section was removed because it causes errors
+/ * in the Studio 10 compiler on AMD 64.
+/ */
+
+ .file "mp_comba.c"
+ .text
+ .align 16
+.globl s_mp_mul_comba_4
+ .type s_mp_mul_comba_4, @function
+s_mp_mul_comba_4:
+.LFB2:
+ pushq %r12
+.LCFI0:
+ pushq %rbp
+.LCFI1:
+ pushq %rbx
+.LCFI2:
+ movq 16(%rdi), %r9
+ movq %rdx, %rbx
+ movq 16(%rsi), %rdx
+ movq (%r9), %rax
+ movq %rax, -64(%rsp)
+ movq 8(%r9), %r8
+ movq %r8, -56(%rsp)
+ movq 16(%r9), %rbp
+ movq %rbp, -48(%rsp)
+ movq 24(%r9), %r12
+ movq %r12, -40(%rsp)
+ movq (%rdx), %rcx
+ movq %rcx, -32(%rsp)
+ movq 8(%rdx), %r10
+ movq %r10, -24(%rsp)
+ movq 16(%rdx), %r11
+ xorl %r10d, %r10d
+ movq %r10, %r8
+ movq %r10, %r9
+ movq %r10, %rbp
+ movq %r11, -16(%rsp)
+ movq 16(%rbx), %r11
+ movq 24(%rdx), %rax
+ movq %rax, -8(%rsp)
+/APP
+ movq -64(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rbp
+
+/NO_APP
+ movq %r8, (%r11)
+ movq %rbp, %r8
+ movq %r10, %rbp
+/APP
+ movq -64(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rbp, %r12
+/APP
+ movq -56(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r9, 8(%r11)
+ movq %r12, %r9
+ movq %r10, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%r12
+
+/NO_APP
+ movq %r12, %rcx
+/APP
+ movq -56(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -48(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 16(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -64(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -40(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 24(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -56(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -40(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 32(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -48(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r8, %r12
+ movq %r9, %rbp
+/APP
+ movq -40(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 40(%r11)
+ movq %rbp, %r8
+ movq %r12, %rcx
+/APP
+ movq -40(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rcx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r8, 48(%r11)
+ movl (%rsi), %esi
+ xorl (%rdi), %esi
+ testq %rcx, %rcx
+ movq %rcx, 56(%r11)
+ movl $8, 8(%rbx)
+ jne .L9
+ .align 16
+.L18:
+ movl 8(%rbx), %edx
+ leal -1(%rdx), %edi
+ testl %edi, %edi
+ movl %edi, 8(%rbx)
+ je .L9
+ leal -2(%rdx), %r10d
+ cmpq $0, (%r11,%r10,8)
+ je .L18
+.L9:
+ movl 8(%rbx), %edx
+ xorl %r11d, %r11d
+ testl %edx, %edx
+ cmovne %esi, %r11d
+ movl %r11d, (%rbx)
+ popq %rbx
+ popq %rbp
+ popq %r12
+ ret
+.LFE2:
+ .size s_mp_mul_comba_4, .-s_mp_mul_comba_4
+ .align 16
+.globl s_mp_mul_comba_8
+ .type s_mp_mul_comba_8, @function
+s_mp_mul_comba_8:
+.LFB3:
+ pushq %r12
+.LCFI3:
+ pushq %rbp
+.LCFI4:
+ pushq %rbx
+.LCFI5:
+ movq %rdx, %rbx
+ subq $8, %rsp
+.LCFI6:
+ movq 16(%rdi), %rdx
+ movq (%rdx), %r8
+ movq %r8, -120(%rsp)
+ movq 8(%rdx), %rbp
+ movq %rbp, -112(%rsp)
+ movq 16(%rdx), %r9
+ movq %r9, -104(%rsp)
+ movq 24(%rdx), %r12
+ movq %r12, -96(%rsp)
+ movq 32(%rdx), %rcx
+ movq %rcx, -88(%rsp)
+ movq 40(%rdx), %r10
+ movq %r10, -80(%rsp)
+ movq 48(%rdx), %r11
+ movq %r11, -72(%rsp)
+ movq 56(%rdx), %rax
+ movq 16(%rsi), %rdx
+ movq %rax, -64(%rsp)
+ movq (%rdx), %r8
+ movq %r8, -56(%rsp)
+ movq 8(%rdx), %rbp
+ movq %rbp, -48(%rsp)
+ movq 16(%rdx), %r9
+ movq %r9, -40(%rsp)
+ movq 24(%rdx), %r12
+ movq %r12, -32(%rsp)
+ movq 32(%rdx), %rcx
+ movq %rcx, -24(%rsp)
+ movq 40(%rdx), %r10
+ movq %r10, -16(%rsp)
+ movq 48(%rdx), %r11
+ xorl %r10d, %r10d
+ movq %r10, %r8
+ movq %r10, %r9
+ movq %r10, %rbp
+ movq %r11, -8(%rsp)
+ movq 16(%rbx), %r11
+ movq 56(%rdx), %rax
+ movq %rax, (%rsp)
+/APP
+ movq -120(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rbp
+
+/NO_APP
+ movq %r8, (%r11)
+ movq %rbp, %r8
+ movq %r10, %rbp
+/APP
+ movq -120(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rbp, %r12
+/APP
+ movq -112(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r9, 8(%r11)
+ movq %r12, %r9
+ movq %r10, %r12
+/APP
+ movq -120(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%r12
+
+/NO_APP
+ movq %r12, %rcx
+/APP
+ movq -112(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -104(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 16(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -96(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 24(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -88(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 32(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -80(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 40(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -72(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 48(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 56(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -112(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 64(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -104(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 72(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -96(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 80(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -88(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 88(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -80(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 96(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -72(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r8, %r12
+ movq %r9, %rbp
+/APP
+ movq -64(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 104(%r11)
+ movq %rbp, %r8
+ movq %r12, %rcx
+/APP
+ movq -64(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rcx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r8, 112(%r11)
+ movl (%rsi), %esi
+ xorl (%rdi), %esi
+ testq %rcx, %rcx
+ movq %rcx, 120(%r11)
+ movl $16, 8(%rbx)
+ jne .L35
+ .align 16
+.L43:
+ movl 8(%rbx), %edx
+ leal -1(%rdx), %edi
+ testl %edi, %edi
+ movl %edi, 8(%rbx)
+ je .L35
+ leal -2(%rdx), %eax
+ cmpq $0, (%r11,%rax,8)
+ je .L43
+.L35:
+ movl 8(%rbx), %r11d
+ xorl %edx, %edx
+ testl %r11d, %r11d
+ cmovne %esi, %edx
+ movl %edx, (%rbx)
+ addq $8, %rsp
+ popq %rbx
+ popq %rbp
+ popq %r12
+ ret
+.LFE3:
+ .size s_mp_mul_comba_8, .-s_mp_mul_comba_8
+ .align 16
+.globl s_mp_mul_comba_16
+ .type s_mp_mul_comba_16, @function
+s_mp_mul_comba_16:
+.LFB4:
+ pushq %r12
+.LCFI7:
+ pushq %rbp
+.LCFI8:
+ pushq %rbx
+.LCFI9:
+ movq %rdx, %rbx
+ subq $136, %rsp
+.LCFI10:
+ movq 16(%rdi), %rax
+ movq (%rax), %r8
+ movq %r8, -120(%rsp)
+ movq 8(%rax), %rbp
+ movq %rbp, -112(%rsp)
+ movq 16(%rax), %r9
+ movq %r9, -104(%rsp)
+ movq 24(%rax), %r12
+ movq %r12, -96(%rsp)
+ movq 32(%rax), %rcx
+ movq %rcx, -88(%rsp)
+ movq 40(%rax), %r10
+ movq %r10, -80(%rsp)
+ movq 48(%rax), %rdx
+ movq %rdx, -72(%rsp)
+ movq 56(%rax), %r11
+ movq %r11, -64(%rsp)
+ movq 64(%rax), %r8
+ movq %r8, -56(%rsp)
+ movq 72(%rax), %rbp
+ movq %rbp, -48(%rsp)
+ movq 80(%rax), %r9
+ movq %r9, -40(%rsp)
+ movq 88(%rax), %r12
+ movq %r12, -32(%rsp)
+ movq 96(%rax), %rcx
+ movq %rcx, -24(%rsp)
+ movq 104(%rax), %r10
+ movq %r10, -16(%rsp)
+ movq 112(%rax), %rdx
+ movq %rdx, -8(%rsp)
+ movq 120(%rax), %r11
+ movq %r11, (%rsp)
+ movq 16(%rsi), %r11
+ movq (%r11), %r8
+ movq %r8, 8(%rsp)
+ movq 8(%r11), %rbp
+ movq %rbp, 16(%rsp)
+ movq 16(%r11), %r9
+ movq %r9, 24(%rsp)
+ movq 24(%r11), %r12
+ movq %r12, 32(%rsp)
+ movq 32(%r11), %rcx
+ movq %rcx, 40(%rsp)
+ movq 40(%r11), %r10
+ movq %r10, 48(%rsp)
+ movq 48(%r11), %rdx
+ movq %rdx, 56(%rsp)
+ movq 56(%r11), %rax
+ movq %rax, 64(%rsp)
+ movq 64(%r11), %r8
+ movq %r8, 72(%rsp)
+ movq 72(%r11), %rbp
+ movq %rbp, 80(%rsp)
+ movq 80(%r11), %r9
+ movq %r9, 88(%rsp)
+ movq 88(%r11), %r12
+ movq %r12, 96(%rsp)
+ movq 96(%r11), %rcx
+ movq %rcx, 104(%rsp)
+ movq 104(%r11), %r10
+ movq %r10, 112(%rsp)
+ movq 112(%r11), %rdx
+ xorl %r10d, %r10d
+ movq %r10, %r8
+ movq %r10, %r9
+ movq %r10, %rbp
+ movq %rdx, 120(%rsp)
+ movq 120(%r11), %rax
+ movq %rax, 128(%rsp)
+ movq 16(%rbx), %r11
+/APP
+ movq -120(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rbp
+
+/NO_APP
+ movq %r8, (%r11)
+ movq %rbp, %r8
+ movq %r10, %rbp
+/APP
+ movq -120(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rbp, %r12
+/APP
+ movq -112(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r9, 8(%r11)
+ movq %r12, %r9
+ movq %r10, %r12
+/APP
+ movq -120(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%r12
+
+/NO_APP
+ movq %r12, %rcx
+/APP
+ movq -112(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -104(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 16(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -96(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 24(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -88(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 32(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -80(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 40(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -72(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 48(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 56(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -56(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 64(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -48(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 72(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -40(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 80(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -32(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 88(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -24(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 96(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -16(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 104(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -8(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 112(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 120(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -112(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 128(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -104(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 136(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -96(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 144(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -88(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 152(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -80(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 160(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -72(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 168(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -64(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 176(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -56(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 184(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -48(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 192(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -40(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 200(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -32(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 208(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -24(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 216(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -16(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 224(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -8(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r8, %r12
+ movq %r9, %rbp
+/APP
+ movq (%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 232(%r11)
+ movq %rbp, %r8
+ movq %r12, %rcx
+/APP
+ movq (%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rcx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r8, 240(%r11)
+ movl (%rsi), %esi
+ xorl (%rdi), %esi
+ testq %rcx, %rcx
+ movq %rcx, 248(%r11)
+ movl $32, 8(%rbx)
+ jne .L76
+ .align 16
+.L84:
+ movl 8(%rbx), %edx
+ leal -1(%rdx), %edi
+ testl %edi, %edi
+ movl %edi, 8(%rbx)
+ je .L76
+ leal -2(%rdx), %eax
+ cmpq $0, (%r11,%rax,8)
+ je .L84
+.L76:
+ movl 8(%rbx), %edx
+ xorl %r11d, %r11d
+ testl %edx, %edx
+ cmovne %esi, %r11d
+ movl %r11d, (%rbx)
+ addq $136, %rsp
+ popq %rbx
+ popq %rbp
+ popq %r12
+ ret
+.LFE4:
+ .size s_mp_mul_comba_16, .-s_mp_mul_comba_16
+ .align 16
+.globl s_mp_mul_comba_32
+ .type s_mp_mul_comba_32, @function
+s_mp_mul_comba_32:
+.LFB5:
+ pushq %rbp
+.LCFI11:
+ movq %rsp, %rbp
+.LCFI12:
+ pushq %r13
+.LCFI13:
+ movq %rdx, %r13
+ movl $256, %edx
+ pushq %r12
+.LCFI14:
+ movq %rsi, %r12
+ pushq %rbx
+.LCFI15:
+ movq %rdi, %rbx
+ subq $520, %rsp
+.LCFI16:
+ movq 16(%rdi), %rsi
+ leaq -544(%rbp), %rdi
+ call memcpy@PLT
+ movq 16(%r12), %rsi
+ leaq -288(%rbp), %rdi
+ movl $256, %edx
+ call memcpy@PLT
+ movq 16(%r13), %r9
+ xorl %r8d, %r8d
+ movq %r8, %rsi
+ movq %r8, %rdi
+ movq %r8, %r10
+/APP
+ movq -544(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+/NO_APP
+ movq %rsi, (%r9)
+ movq %r10, %rsi
+ movq %r8, %r10
+/APP
+ movq -544(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r10
+
+/NO_APP
+ movq %r10, %r11
+/APP
+ movq -536(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r11
+
+/NO_APP
+ movq %rdi, 8(%r9)
+ movq %r11, %rdi
+ movq %r8, %r11
+/APP
+ movq -544(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%r11
+
+/NO_APP
+ movq %r11, %rcx
+/APP
+ movq -536(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -528(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 16(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -520(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 24(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -512(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 32(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -504(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 40(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -496(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 48(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -488(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 56(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -480(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 64(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -472(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 72(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -464(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 80(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -456(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 88(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -448(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 96(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -440(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 104(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -432(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 112(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -424(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 120(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -416(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 128(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -408(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 136(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -400(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 144(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -392(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 152(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -384(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 160(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -376(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 168(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -368(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 176(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -360(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 184(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -352(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 192(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -344(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 200(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -336(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 208(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -328(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 216(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -320(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 224(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -312(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 232(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -304(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 240(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 248(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -536(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 256(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -528(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 264(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -520(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 272(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -512(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 280(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -504(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 288(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -496(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 296(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -488(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 304(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -480(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 312(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -472(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 320(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -464(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 328(%r9)
+ movq %r11, %rdi
+ movq %r10, %r11
+ movq %r8, %r10
+/APP
+ movq -456(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -448(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -440(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -432(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -424(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -416(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -408(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -400(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -392(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -384(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -376(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -368(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -360(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -352(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -344(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -336(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -328(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -320(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -312(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -304(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -296(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+/NO_APP
+ movq %r11, 336(%r9)
+ movq %r10, %rsi
+ movq %r8, %r10
+/APP
+ movq -448(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r10
+
+/NO_APP
+ movq %r10, %rcx
+/APP
+ movq -440(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rsi, %r11
+ movq %rcx, %r10
+/APP
+ movq -296(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%r11
+ adcq $0,%r10
+
+/NO_APP
+ movq %rdi, 344(%r9)
+ movq %r11, %rcx
+ movq %r10, %rdi
+ movq %r8, %r11
+/APP
+ movq -440(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%r11
+
+/NO_APP
+ movq %r11, %rsi
+/APP
+ movq -432(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 352(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -432(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 360(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -424(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 368(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -416(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 376(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -408(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 384(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -400(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 392(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -392(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 400(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -384(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 408(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -376(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 416(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -368(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 424(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -360(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 432(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -352(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 440(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -344(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 448(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -336(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 456(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -328(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 464(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -320(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 472(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -312(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 480(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -304(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rcx, %r11
+ movq %rdi, %r10
+/APP
+ movq -296(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 488(%r9)
+ movq %r10, %rcx
+ movq %r11, %rsi
+/APP
+ movq -296(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rsi
+ adcq $0,%r8
+
+/NO_APP
+ movq %rcx, 496(%r9)
+ movl (%r12), %ecx
+ xorl (%rbx), %ecx
+ testq %rsi, %rsi
+ movq %rsi, 504(%r9)
+ movl $64, 8(%r13)
+ jne .L149
+ .align 16
+.L157:
+ movl 8(%r13), %edx
+ leal -1(%rdx), %ebx
+ testl %ebx, %ebx
+ movl %ebx, 8(%r13)
+ je .L149
+ leal -2(%rdx), %r12d
+ cmpq $0, (%r9,%r12,8)
+ je .L157
+.L149:
+ movl 8(%r13), %r9d
+ xorl %edx, %edx
+ testl %r9d, %r9d
+ cmovne %ecx, %edx
+ movl %edx, (%r13)
+ addq $520, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ leave
+ ret
+.LFE5:
+ .size s_mp_mul_comba_32, .-s_mp_mul_comba_32
+ .align 16
+.globl s_mp_sqr_comba_4
+ .type s_mp_sqr_comba_4, @function
+s_mp_sqr_comba_4:
+.LFB6:
+ pushq %rbp
+.LCFI17:
+ movq %rsi, %r11
+ xorl %esi, %esi
+ movq %rsi, %r10
+ movq %rsi, %rbp
+ movq %rsi, %r8
+ pushq %rbx
+.LCFI18:
+ movq %rsi, %rbx
+ movq 16(%rdi), %rcx
+ movq %rsi, %rdi
+/APP
+ movq (%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r10, -72(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 8(%rcx)
+ addq %rax,%rbx
+ adcq %rdx,%rdi
+ adcq $0,%rbp
+ addq %rax,%rbx
+ adcq %rdx,%rdi
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rbx, -64(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%rbp
+ adcq $0,%r8
+ addq %rax,%rdi
+ adcq %rdx,%rbp
+ adcq $0,%r8
+
+/NO_APP
+ movq %rbp, %rbx
+ movq %r8, %rbp
+/APP
+ movq 8(%rcx),%rax
+ mulq %rax
+ addq %rax,%rdi
+ adcq %rdx,%rbx
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rdi, -56(%rsp)
+ movq %rbp, %r9
+ movq %rbx, %r8
+ movq %rsi, %rdi
+/APP
+ movq (%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rdi
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r9, %rbx
+ movq %rdi, %rbp
+/APP
+ movq 8(%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rbx
+ adcq $0,%rbp
+ addq %rax,%r8
+ adcq %rdx,%rbx
+ adcq $0,%rbp
+
+/NO_APP
+ movq %r8, -48(%rsp)
+ movq %rbp, %r9
+ movq %rbx, %rdi
+ movq %rsi, %r8
+ movl $8, 8(%r11)
+ movl $0, (%r11)
+/APP
+ movq 8(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%r9
+ adcq $0,%r8
+ addq %rax,%rdi
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbx
+ movq %r8, %rbp
+/APP
+ movq 16(%rcx),%rax
+ mulq %rax
+ addq %rax,%rdi
+ adcq %rdx,%rbx
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rbp, %rax
+ movq %rdi, -40(%rsp)
+ movq %rbx, %rbp
+ movq %rax, %rdi
+ movq %rsi, %rbx
+/APP
+ movq 16(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%rbp
+ adcq %rdx,%rdi
+ adcq $0,%rbx
+ addq %rax,%rbp
+ adcq %rdx,%rdi
+ adcq $0,%rbx
+
+/NO_APP
+ movq %rbp, -32(%rsp)
+ movq %rbx, %r9
+/APP
+ movq 24(%rcx),%rax
+ mulq %rax
+ addq %rax,%rdi
+ adcq %rdx,%r9
+ adcq $0,%rsi
+
+/NO_APP
+ movq 16(%r11), %rdx
+ movq %rdi, -24(%rsp)
+ movq %r9, -16(%rsp)
+ movq %r10, (%rdx)
+ movq -64(%rsp), %r8
+ movq %r8, 8(%rdx)
+ movq -56(%rsp), %rbp
+ movq %rbp, 16(%rdx)
+ movq -48(%rsp), %rdi
+ movq %rdi, 24(%rdx)
+ movq -40(%rsp), %rsi
+ movq %rsi, 32(%rdx)
+ movq -32(%rsp), %rbx
+ movq %rbx, 40(%rdx)
+ movq -24(%rsp), %rcx
+ movq %rcx, 48(%rdx)
+ movq -16(%rsp), %rax
+ movq %rax, 56(%rdx)
+ movl 8(%r11), %edx
+ testl %edx, %edx
+ je .L168
+ leal -1(%rdx), %ecx
+ movq 16(%r11), %rsi
+ mov %ecx, %r10d
+ cmpq $0, (%rsi,%r10,8)
+ jne .L166
+ movl %ecx, %edx
+ .align 16
+.L167:
+ testl %edx, %edx
+ movl %edx, %ecx
+ je .L171
+ decl %edx
+ mov %edx, %eax
+ cmpq $0, (%rsi,%rax,8)
+ je .L167
+ movl %ecx, 8(%r11)
+ movl %ecx, %edx
+.L166:
+ testl %edx, %edx
+ je .L168
+ popq %rbx
+ popq %rbp
+ movl (%r11), %eax
+ movl %eax, (%r11)
+ ret
+.L171:
+ movl %edx, 8(%r11)
+ .align 16
+.L168:
+ popq %rbx
+ popq %rbp
+ xorl %eax, %eax
+ movl %eax, (%r11)
+ ret
+.LFE6:
+ .size s_mp_sqr_comba_4, .-s_mp_sqr_comba_4
+ .align 16
+.globl s_mp_sqr_comba_8
+ .type s_mp_sqr_comba_8, @function
+s_mp_sqr_comba_8:
+.LFB7:
+ pushq %r14
+.LCFI19:
+ xorl %r9d, %r9d
+ movq %r9, %r14
+ movq %r9, %r10
+ pushq %r13
+.LCFI20:
+ movq %r9, %r13
+ pushq %r12
+.LCFI21:
+ movq %r9, %r12
+ pushq %rbp
+.LCFI22:
+ movq %rsi, %rbp
+ movq %r9, %rsi
+ pushq %rbx
+.LCFI23:
+ movq %r9, %rbx
+ subq $8, %rsp
+.LCFI24:
+ movq 16(%rdi), %rcx
+/APP
+ movq (%rcx),%rax
+ mulq %rax
+ addq %rax,%r14
+ adcq %rdx,%rbx
+ adcq $0,%r12
+
+/NO_APP
+ movq %r14, -120(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 8(%rcx)
+ addq %rax,%rbx
+ adcq %rdx,%r12
+ adcq $0,%r10
+ addq %rax,%rbx
+ adcq %rdx,%r12
+ adcq $0,%r10
+
+/NO_APP
+ movq %rbx, -112(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%r10
+ adcq $0,%r13
+ addq %rax,%r12
+ adcq %rdx,%r10
+ adcq $0,%r13
+
+/NO_APP
+ movq %r10, %rbx
+ movq %r13, %r10
+ movq %r9, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq %rax
+ addq %rax,%r12
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r12, -104(%rsp)
+ movq %r10, %rdi
+ movq %rbx, %r11
+/APP
+ movq (%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %rbx
+ movq %rsi, %r10
+ movq %r9, %rdi
+/APP
+ movq 8(%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%r11
+ adcq %rdx,%rbx
+ adcq $0,%r10
+ addq %rax,%r11
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r9, %rsi
+ movq %r11, -96(%rsp)
+ movq %r10, %r8
+ movq %rbx, %r12
+ movq %r9, %r11
+/APP
+ movq (%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%r8
+ adcq $0,%r13
+ addq %rax,%r12
+ adcq %rdx,%r8
+ adcq $0,%r13
+
+ movq 8(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%r8
+ adcq $0,%r13
+ addq %rax,%r12
+ adcq %rdx,%r8
+ adcq $0,%r13
+
+/NO_APP
+ movq %r8, %rbx
+ movq %r13, %r10
+ movq %r9, %r8
+/APP
+ movq 16(%rcx),%rax
+ mulq %rax
+ addq %rax,%r12
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r12, -88(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 40(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+
+/NO_APP
+ movq %rbx, -80(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 48(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 24(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -72(%rsp)
+ movq %r11, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 56(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+/APP
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rax
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rax
+
+/NO_APP
+ movq %rbx, -64(%rsp)
+ movq %rax, %r11
+ movq %r9, %rbx
+/APP
+ movq 8(%rcx),%rax
+ mulq 56(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+/NO_APP
+ movq %rbx, %rsi
+ movq %r13, %rdi
+ movq %r11, %rbx
+ movq %r12, %r13
+ movq %rsi, %r11
+/APP
+ movq 32(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -56(%rsp)
+ movq %r9, %r10
+/APP
+ movq 16(%rcx),%rax
+ mulq 56(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %r13,%r13
+
+ movq 24(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%r13
+
+ movq 32(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%r13
+
+/NO_APP
+ movq %rdi, %r12
+ movq %r13, %rax
+/APP
+ addq %r8,%rbx
+ adcq %r12,%r11
+ adcq %rax,%r10
+ addq %r8,%rbx
+ adcq %r12,%r11
+ adcq %rax,%r10
+
+/NO_APP
+ movq %rbx, -48(%rsp)
+ movq %r11, %r12
+ movq %r10, %rsi
+ movq %r9, %rbx
+ movq %r9, %r11
+/APP
+ movq 24(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%rbx
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%rbx
+
+/NO_APP
+ movq %rbx, %r13
+/APP
+ movq 32(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%r13
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%r13
+
+/NO_APP
+ movq %rsi, %r10
+ movq %r13, %rbx
+ movq %r9, %r13
+/APP
+ movq 40(%rcx),%rax
+ mulq %rax
+ addq %rax,%r12
+ adcq %rdx,%r10
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r12, -40(%rsp)
+ movq %rbx, %r8
+ movq %r10, %rdi
+/APP
+ movq 32(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%r8
+ adcq $0,%r11
+ addq %rax,%rdi
+ adcq %rdx,%r8
+ adcq $0,%r11
+
+/NO_APP
+ movq %r8, %r10
+ movq %r11, %rbx
+/APP
+ movq 40(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%r10
+ adcq $0,%rbx
+ addq %rax,%rdi
+ adcq %rdx,%r10
+ adcq $0,%rbx
+
+/NO_APP
+ movq %rdi, -32(%rsp)
+ movq %rbx, %rsi
+ movq %r10, %r12
+/APP
+ movq 40(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%r13
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%r13
+
+/NO_APP
+ movq %rsi, %r10
+ movq %r13, %rbx
+/APP
+ movq 48(%rcx),%rax
+ mulq %rax
+ addq %rax,%r12
+ adcq %rdx,%r10
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r12, -24(%rsp)
+ movq %r10, %rdi
+ movq %rbx, %rsi
+ movq %r9, %r10
+ movl $16, 8(%rbp)
+ movl $0, (%rbp)
+/APP
+ movq 48(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r10
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r10
+
+/NO_APP
+ movq %rdi, -16(%rsp)
+ movq %r10, %r8
+/APP
+ movq 56(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r9
+
+/NO_APP
+ movq 16(%rbp), %rax
+ movq %rsi, -8(%rsp)
+ movq %r8, (%rsp)
+ movq %r14, (%rax)
+ movq -112(%rsp), %rbx
+ movq %rbx, 8(%rax)
+ movq -104(%rsp), %rcx
+ movq %rcx, 16(%rax)
+ movq -96(%rsp), %rdx
+ movq %rdx, 24(%rax)
+ movq -88(%rsp), %r14
+ movq %r14, 32(%rax)
+ movq -80(%rsp), %r13
+ movq %r13, 40(%rax)
+ movq -72(%rsp), %r12
+ movq %r12, 48(%rax)
+ movq -64(%rsp), %r11
+ movq %r11, 56(%rax)
+ movq -56(%rsp), %r10
+ movq %r10, 64(%rax)
+ movq -48(%rsp), %r9
+ movq %r9, 72(%rax)
+ movq -40(%rsp), %r8
+ movq %r8, 80(%rax)
+ movq -32(%rsp), %rdi
+ movq %rdi, 88(%rax)
+ movq -24(%rsp), %rsi
+ movq %rsi, 96(%rax)
+ movq -16(%rsp), %rbx
+ movq %rbx, 104(%rax)
+ movq -8(%rsp), %rcx
+ movq %rcx, 112(%rax)
+ movq (%rsp), %rdx
+ movq %rdx, 120(%rax)
+ movl 8(%rbp), %edx
+ testl %edx, %edx
+ je .L192
+ leal -1(%rdx), %ecx
+ movq 16(%rbp), %rsi
+ mov %ecx, %r14d
+ cmpq $0, (%rsi,%r14,8)
+ jne .L190
+ movl %ecx, %edx
+ .align 16
+.L191:
+ testl %edx, %edx
+ movl %edx, %ecx
+ je .L195
+ decl %edx
+ mov %edx, %r9d
+ cmpq $0, (%rsi,%r9,8)
+ je .L191
+ movl %ecx, 8(%rbp)
+ movl %ecx, %edx
+.L190:
+ testl %edx, %edx
+ je .L192
+ movl (%rbp), %eax
+ movl %eax, (%rbp)
+ addq $8, %rsp
+ popq %rbx
+ popq %rbp
+ popq %r12
+ popq %r13
+ popq %r14
+ ret
+.L195:
+ movl %edx, 8(%rbp)
+ .align 16
+.L192:
+ xorl %eax, %eax
+ movl %eax, (%rbp)
+ addq $8, %rsp
+ popq %rbx
+ popq %rbp
+ popq %r12
+ popq %r13
+ popq %r14
+ ret
+.LFE7:
+ .size s_mp_sqr_comba_8, .-s_mp_sqr_comba_8
+ .align 16
+.globl s_mp_sqr_comba_16
+ .type s_mp_sqr_comba_16, @function
+s_mp_sqr_comba_16:
+.LFB8:
+ pushq %rbp
+.LCFI25:
+ xorl %r9d, %r9d
+ movq %r9, %r8
+ movq %r9, %r11
+ movq %rsp, %rbp
+.LCFI26:
+ pushq %r14
+.LCFI27:
+ movq %rsi, %r14
+ movq %r9, %rsi
+ pushq %r13
+.LCFI28:
+ movq %r9, %r13
+ pushq %r12
+.LCFI29:
+ movq %r9, %r12
+ pushq %rbx
+.LCFI30:
+ movq %r9, %rbx
+ subq $256, %rsp
+.LCFI31:
+ movq 16(%rdi), %rcx
+/APP
+ movq (%rcx),%rax
+ mulq %rax
+ addq %rax,%r8
+ adcq %rdx,%rbx
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, -288(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 8(%rcx)
+ addq %rax,%rbx
+ adcq %rdx,%rsi
+ adcq $0,%r12
+ addq %rax,%rbx
+ adcq %rdx,%rsi
+ adcq $0,%r12
+
+/NO_APP
+ movq %rbx, -280(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r12
+ adcq $0,%r13
+ addq %rax,%rsi
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r12, %rbx
+ movq %r13, %r10
+/APP
+ movq 8(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %rsi, -272(%rbp)
+ movq %r10, %rdi
+ movq %r9, %rsi
+ movq %rbx, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r10
+ adcq %rdx,%rdi
+ adcq $0,%r11
+ addq %rax,%r10
+ adcq %rdx,%rdi
+ adcq $0,%r11
+
+/NO_APP
+ movq %rdi, %r12
+ movq %r11, %rbx
+ movq %r9, %rdi
+/APP
+ movq 8(%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%rbx
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r9, %r11
+ movq %r10, -264(%rbp)
+ movq %rbx, %r8
+ movq %r12, %r13
+ movq %r9, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+ movq 8(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, %rbx
+ movq %r12, %r10
+ movq %r9, %r8
+/APP
+ movq 16(%rcx),%rax
+ mulq %rax
+ addq %rax,%r13
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r13, -256(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 40(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+
+/NO_APP
+ movq %rbx, -248(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 48(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 24(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -240(%rbp)
+ movq %r11, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 56(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rdx
+/APP
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rbx, -232(%rbp)
+ movq %r9, %rbx
+/APP
+ movq (%rcx),%rax
+ mulq 64(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+ movq 32(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%r11
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r13, %rdi
+ movq %r10, -224(%rbp)
+ movq %r12, %rsi
+ movq %rbx, %r10
+ movq %r9, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 72(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r11
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+ addq %r8,%r11
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %r11, -216(%rbp)
+ movq %r12, %rbx
+/APP
+ movq (%rcx),%rax
+ mulq 80(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%rbx
+ adcq %r12,%rax
+ addq %r8,%r10
+ adcq %r13,%rbx
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %rbx, %r11
+ movq %r13, %rdi
+ movq %rdx, %rbx
+ movq %r12, %rsi
+/APP
+ movq 40(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%r11
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r10, -208(%rbp)
+ movq %rbx, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 88(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rdx
+/APP
+ addq %r8,%r11
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+ addq %r8,%r11
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rdx, %r13
+ movq %r11, -200(%rbp)
+ movq %r13, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 96(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %rdx
+ movq %rsi, %r11
+/APP
+ addq %r8,%r10
+ adcq %rdx,%r12
+ adcq %r11,%rax
+ addq %r8,%r10
+ adcq %rdx,%r12
+ adcq %r11,%rax
+
+/NO_APP
+ movq %rdx, %rbx
+ movq %rax, %r13
+ movq %r11, %rsi
+/APP
+ movq 48(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %rbx, %rdi
+ movq %r10, -192(%rbp)
+ movq %r13, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 104(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r9, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r13
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r13
+
+/NO_APP
+ movq %r12, -184(%rbp)
+ movq %r13, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 112(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %rbx
+ movq %rsi, %rdx
+/APP
+ addq %r8,%r10
+ adcq %rbx,%r12
+ adcq %rdx,%rax
+ addq %r8,%r10
+ adcq %rbx,%r12
+ adcq %rdx,%rax
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r13
+ movq %rbx, %rdi
+/APP
+ movq 56(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r10, -176(%rbp)
+ movq %r13, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r9, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r13
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r13
+
+/NO_APP
+ movq %r12, -168(%rbp)
+ movq %r13, %r12
+/APP
+ movq 8(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %rbx
+ movq %rsi, %rdx
+/APP
+ addq %r8,%r10
+ adcq %rbx,%r12
+ adcq %rdx,%rax
+ addq %r8,%r10
+ adcq %rbx,%r12
+ adcq %rdx,%rax
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r13
+ movq %rbx, %rdi
+/APP
+ movq 64(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r10, -160(%rbp)
+ movq %r9, %r11
+/APP
+ movq 16(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r13, %r10
+ movq %r9, %rbx
+/APP
+ movq 24(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+
+/NO_APP
+ movq %r12, -152(%rbp)
+/APP
+ movq 24(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+/NO_APP
+ movq %rbx, %rdx
+ movq %r13, %rdi
+ movq %r11, %rbx
+ movq %r12, %rsi
+ movq %rdx, %r11
+ movq %r9, %r12
+/APP
+ movq 72(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -144(%rbp)
+ movq %r11, %r10
+/APP
+ movq 32(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %rbx, -136(%rbp)
+ movq %r12, %r11
+/APP
+ movq 40(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 80(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -128(%rbp)
+ movq %r11, %r10
+/APP
+ movq 48(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rdx
+/APP
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rbx, -120(%rbp)
+ movq %rdx, %r11
+ movq %r9, %rbx
+/APP
+ movq 56(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+/NO_APP
+ movq %rbx, %rdx
+ movq %r13, %rdi
+ movq %r11, %rbx
+ movq %r12, %rsi
+ movq %rdx, %r11
+ movq %r9, %r12
+/APP
+ movq 88(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -112(%rbp)
+ movq %r11, %r10
+/APP
+ movq 64(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 88(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %rbx, -104(%rbp)
+ movq %r12, %r11
+/APP
+ movq 72(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 88(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 96(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -96(%rbp)
+ movq %r9, %r10
+/APP
+ movq 80(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 88(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 96(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r12
+ movq %rsi, %rax
+ movq %r9, %rsi
+/APP
+ addq %r8,%rbx
+ adcq %r12,%r11
+ adcq %rax,%r10
+ addq %r8,%rbx
+ adcq %r12,%r11
+ adcq %rax,%r10
+
+/NO_APP
+ movq %r9, %r12
+ movq %rbx, -88(%rbp)
+ movq %r11, %r13
+ movq %r10, %r11
+/APP
+ movq 88(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r11
+ adcq $0,%r12
+ addq %rax,%r13
+ adcq %rdx,%r11
+ adcq $0,%r12
+
+/NO_APP
+ movq %r12, %rdi
+/APP
+ movq 96(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r11
+ adcq $0,%rdi
+ addq %rax,%r13
+ adcq %rdx,%r11
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r11, %rbx
+ movq %rdi, %r10
+ movq %r9, %r11
+/APP
+ movq 104(%rcx),%rax
+ mulq %rax
+ addq %rax,%r13
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r13, -80(%rbp)
+ movq %r10, %r8
+ movq %rbx, %r10
+/APP
+ movq 96(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r10
+ adcq %rdx,%r8
+ adcq $0,%rsi
+ addq %rax,%r10
+ adcq %rdx,%r8
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %r12
+ movq %rsi, %rbx
+/APP
+ movq 104(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%rbx
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r10, -72(%rbp)
+ movq %rbx, %r13
+ movq %r12, %rbx
+/APP
+ movq 104(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rbx
+ adcq %rdx,%r13
+ adcq $0,%r11
+ addq %rax,%rbx
+ adcq %rdx,%r13
+ adcq $0,%r11
+
+/NO_APP
+ movq %r11, %r12
+ movq %r13, %r10
+/APP
+ movq 112(%rcx),%rax
+ mulq %rax
+ addq %rax,%rbx
+ adcq %rdx,%r10
+ adcq $0,%r12
+
+/NO_APP
+ movq %rbx, -64(%rbp)
+ movq %r10, %rdi
+ movq %r9, %rbx
+ movq %r12, %rsi
+/APP
+ movq 112(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rbx
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rbx
+
+/NO_APP
+ movq %rdi, -56(%rbp)
+ movq %rbx, %r8
+/APP
+ movq 120(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r9
+
+/NO_APP
+ movq %rsi, -48(%rbp)
+ movq 16(%r14), %rdi
+ leaq -288(%rbp), %rsi
+ movl $256, %edx
+ movq %r8, -40(%rbp)
+ movl $32, 8(%r14)
+ movl $0, (%r14)
+ call memcpy@PLT
+ movl 8(%r14), %edx
+ testl %edx, %edx
+ je .L232
+ leal -1(%rdx), %ecx
+ movq 16(%r14), %rsi
+ mov %ecx, %r9d
+ cmpq $0, (%rsi,%r9,8)
+ jne .L230
+ movl %ecx, %edx
+ .align 16
+.L231:
+ testl %edx, %edx
+ movl %edx, %ecx
+ je .L235
+ decl %edx
+ mov %edx, %eax
+ cmpq $0, (%rsi,%rax,8)
+ je .L231
+ movl %ecx, 8(%r14)
+ movl %ecx, %edx
+.L230:
+ testl %edx, %edx
+ je .L232
+ movl (%r14), %eax
+ movl %eax, (%r14)
+ addq $256, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ leave
+ ret
+.L235:
+ movl %edx, 8(%r14)
+ .align 16
+.L232:
+ xorl %eax, %eax
+ movl %eax, (%r14)
+ addq $256, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ leave
+ ret
+.LFE8:
+ .size s_mp_sqr_comba_16, .-s_mp_sqr_comba_16
+ .align 16
+.globl s_mp_sqr_comba_32
+ .type s_mp_sqr_comba_32, @function
+s_mp_sqr_comba_32:
+.LFB9:
+ pushq %rbp
+.LCFI32:
+ xorl %r10d, %r10d
+ movq %r10, %r8
+ movq %r10, %r11
+ movq %rsp, %rbp
+.LCFI33:
+ pushq %r14
+.LCFI34:
+ movq %rsi, %r14
+ movq %r10, %rsi
+ pushq %r13
+.LCFI35:
+ movq %r10, %r13
+ pushq %r12
+.LCFI36:
+ movq %r10, %r12
+ pushq %rbx
+.LCFI37:
+ movq %r10, %rbx
+ subq $512, %rsp
+.LCFI38:
+ movq 16(%rdi), %rcx
+/APP
+ movq (%rcx),%rax
+ mulq %rax
+ addq %rax,%r8
+ adcq %rdx,%rbx
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, -544(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 8(%rcx)
+ addq %rax,%rbx
+ adcq %rdx,%rsi
+ adcq $0,%r12
+ addq %rax,%rbx
+ adcq %rdx,%rsi
+ adcq $0,%r12
+
+/NO_APP
+ movq %rbx, -536(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r12
+ adcq $0,%r13
+ addq %rax,%rsi
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r12, %rbx
+ movq %r13, %r9
+/APP
+ movq 8(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%rbx
+ adcq $0,%r9
+
+/NO_APP
+ movq %rsi, -528(%rbp)
+ movq %r9, %rdi
+ movq %r10, %rsi
+ movq %rbx, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r9
+ adcq %rdx,%rdi
+ adcq $0,%r11
+ addq %rax,%r9
+ adcq %rdx,%rdi
+ adcq $0,%r11
+
+/NO_APP
+ movq %rdi, %r12
+ movq %r11, %r13
+ movq %r10, %rdi
+/APP
+ movq 8(%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r10, %r11
+ movq %r9, -520(%rbp)
+ movq %r13, %r8
+ movq %r12, %r13
+ movq %r10, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+ movq 8(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, %rbx
+ movq %r12, %r9
+ movq %r10, %r8
+/APP
+ movq 16(%rcx),%rax
+ mulq %rax
+ addq %rax,%r13
+ adcq %rdx,%rbx
+ adcq $0,%r9
+
+/NO_APP
+ movq %r13, -512(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 40(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r11
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r11
+
+/NO_APP
+ movq %rbx, -504(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 48(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 24(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r9, -496(%rbp)
+ movq %r11, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 56(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rdx
+/APP
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rbx, -488(%rbp)
+ movq %r10, %rbx
+/APP
+ movq (%rcx),%rax
+ mulq 64(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+ movq 32(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r11
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r13, %rdi
+ movq %r9, -480(%rbp)
+ movq %r12, %rsi
+ movq %rbx, %r9
+ movq %r10, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 72(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r11
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+ addq %r8,%r11
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %r11, -472(%rbp)
+ movq %r12, %rbx
+/APP
+ movq (%rcx),%rax
+ mulq 80(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%rbx
+ adcq %r12,%rax
+ addq %r8,%r9
+ adcq %r13,%rbx
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %rbx, %r11
+ movq %r13, %rdi
+ movq %rdx, %rbx
+ movq %r12, %rsi
+/APP
+ movq 40(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r11
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r9, -464(%rbp)
+ movq %rbx, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 88(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rdx
+/APP
+ addq %r8,%r11
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+ addq %r8,%r11
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rdx, %r13
+ movq %r11, -456(%rbp)
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 96(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %rax
+ movq %rsi, %r11
+/APP
+ addq %r8,%r9
+ adcq %rax,%r12
+ adcq %r11,%r13
+ addq %r8,%r9
+ adcq %rax,%r12
+ adcq %r11,%r13
+
+/NO_APP
+ movq %rax, %rbx
+ movq %r11, %rsi
+/APP
+ movq 48(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %rbx, %rdi
+ movq %r9, -448(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 104(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%r13
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%r13
+
+/NO_APP
+ movq %r12, -440(%rbp)
+ movq %r10, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 112(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r13, %rdx
+ movq %rdi, %rbx
+ movq %rsi, %r13
+/APP
+ addq %r8,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+ addq %r8,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+
+/NO_APP
+ movq %r12, %rax
+ movq %r13, %r11
+ movq %rdx, %r12
+ movq %rax, %r13
+ movq %rbx, %rdi
+ movq %r11, %rsi
+/APP
+ movq 56(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, -432(%rbp)
+ movq %r13, %r9
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rax
+ movq %rdi, %rdx
+ movq %rsi, %rbx
+/APP
+ addq %rax,%r12
+ adcq %rdx,%r9
+ adcq %rbx,%r13
+ addq %rax,%r12
+ adcq %rdx,%r9
+ adcq %rbx,%r13
+
+/NO_APP
+ movq %r12, -424(%rbp)
+ movq %rdx, %r8
+ movq %rax, %rsi
+ movq %rbx, %rdi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 128(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 64(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -416(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 136(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -408(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 144(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 72(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -400(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 152(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -392(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 160(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 80(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -384(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 168(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -376(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 176(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 88(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -368(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 184(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 88(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %rdi
+ movq %r12, -360(%rbp)
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 192(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+/APP
+ addq %rsi,%r9
+ adcq %rbx,%r12
+ adcq %rax,%r13
+ addq %rsi,%r9
+ adcq %rbx,%r12
+ adcq %rax,%r13
+
+/NO_APP
+ movq %rax, %r11
+ movq %rbx, %r8
+/APP
+ movq 96(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rdi
+ movq %r9, -352(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 200(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+
+/NO_APP
+ movq %r12, -344(%rbp)
+ movq %r10, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 208(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r13, %rdx
+ movq %r8, %rbx
+ movq %rdi, %r13
+/APP
+ addq %rsi,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+ addq %rsi,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+
+/NO_APP
+ movq %r12, %rax
+ movq %r13, %r11
+ movq %rdx, %r12
+ movq %rax, %r13
+ movq %rbx, %r8
+ movq %r11, %rdi
+/APP
+ movq 104(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, -336(%rbp)
+ movq %r13, %r9
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 216(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+
+/NO_APP
+ movq %r12, -328(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 224(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r13, %rax
+ movq %r10, %rdx
+ movq %r8, %rbx
+ movq %rdi, %r12
+/APP
+ addq %rsi,%r9
+ adcq %rbx,%rax
+ adcq %r12,%rdx
+ addq %rsi,%r9
+ adcq %rbx,%rax
+ adcq %r12,%rdx
+
+/NO_APP
+ movq %rdx, %rdi
+ movq %r12, %r11
+ movq %rbx, %r8
+ movq %rax, %r12
+ movq %rdi, %r13
+ movq %r11, %rdi
+/APP
+ movq 112(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, -320(%rbp)
+ movq %r13, %rbx
+ movq %r10, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 232(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%rbx
+ adcq %rdi,%r9
+ addq %rsi,%r12
+ adcq %r8,%rbx
+ adcq %rdi,%r9
+
+/NO_APP
+ movq %r12, -312(%rbp)
+ movq %r9, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 240(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r10, %rax
+ movq %r8, %r11
+ movq %rdi, %rdx
+/APP
+ addq %rsi,%rbx
+ adcq %r11,%r13
+ adcq %rdx,%rax
+ addq %rsi,%rbx
+ adcq %r11,%r13
+ adcq %rdx,%rax
+
+/NO_APP
+ movq %rdx, %r9
+ movq %rax, %rdx
+ movq %r13, %r12
+ movq %r11, %r8
+ movq %rdx, %r13
+ movq %r9, %rdi
+/APP
+ movq 120(%rcx),%rax
+ mulq %rax
+ addq %rax,%rbx
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %rbx, -304(%rbp)
+ movq %r13, %rbx
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%rbx
+ adcq %rdi,%r13
+ addq %rsi,%r12
+ adcq %r8,%rbx
+ adcq %rdi,%r13
+
+/NO_APP
+ movq %r12, -296(%rbp)
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r8, %r11
+ movq %rdi, %rax
+/APP
+ addq %rsi,%rbx
+ adcq %r11,%r12
+ adcq %rax,%r13
+ addq %rsi,%rbx
+ adcq %r11,%r12
+ adcq %rax,%r13
+
+/NO_APP
+ movq %rax, %r9
+ movq %r11, %r8
+/APP
+ movq 128(%rcx),%rax
+ mulq %rax
+ addq %rax,%rbx
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, %rdi
+ movq %rbx, -288(%rbp)
+ movq %r13, %r9
+/APP
+ movq 16(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 24(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+
+/NO_APP
+ movq %r12, -280(%rbp)
+ movq %r10, %r12
+/APP
+ movq 24(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r13, %rdx
+ movq %r8, %rbx
+ movq %rdi, %r13
+/APP
+ addq %rsi,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+ addq %rsi,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+
+/NO_APP
+ movq %r12, %rax
+ movq %r13, %r11
+ movq %rdx, %r12
+ movq %rax, %r13
+ movq %rbx, %r8
+ movq %r11, %rdi
+/APP
+ movq 136(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, -272(%rbp)
+ movq %r13, %r9
+ movq %r10, %r13
+/APP
+ movq 32(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+
+/NO_APP
+ movq %r12, -264(%rbp)
+/APP
+ movq 40(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r13, %rax
+ movq %r10, %rdx
+ movq %r8, %rbx
+ movq %rdi, %r12
+/APP
+ addq %rsi,%r9
+ adcq %rbx,%rax
+ adcq %r12,%rdx
+ addq %rsi,%r9
+ adcq %rbx,%rax
+ adcq %r12,%rdx
+
+/NO_APP
+ movq %rdx, %rdi
+ movq %r12, %r11
+ movq %rbx, %r8
+ movq %rax, %r12
+ movq %rdi, %r13
+ movq %r11, %rdi
+/APP
+ movq 144(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r10, %r11
+ movq %r9, -256(%rbp)
+ movq %r13, %r9
+/APP
+ movq 48(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r11
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r11
+
+/NO_APP
+ movq %r12, -248(%rbp)
+ movq %r11, %r13
+/APP
+ movq 56(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r10, %rax
+ movq %rsi, %rdx
+ movq %r8, %rbx
+ movq %rdi, %r12
+/APP
+ addq %rdx,%r9
+ adcq %rbx,%r13
+ adcq %r12,%rax
+ addq %rdx,%r9
+ adcq %rbx,%r13
+ adcq %r12,%rax
+
+/NO_APP
+ movq %r12, %r11
+ movq %rdx, %r8
+ movq %rax, %rdx
+ movq %r13, %r12
+ movq %rbx, %rdi
+ movq %rdx, %r13
+ movq %r11, %rsi
+/APP
+ movq 152(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, -240(%rbp)
+ movq %r13, %r9
+ movq %r10, %r13
+/APP
+ movq 64(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 88(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 96(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 104(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 112(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 120(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 128(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 136(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rax
+ movq %rdi, %rdx
+ movq %rsi, %rbx
+/APP
+ addq %rax,%r12
+ adcq %rdx,%r9
+ adcq %rbx,%r13
+ addq %rax,%r12
+ adcq %rdx,%r9
+ adcq %rbx,%r13
+
+/NO_APP
+ movq %r12, -232(%rbp)
+ movq %rdx, %r8
+ movq %rax, %rsi
+ movq %rbx, %rdi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 72(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 152(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 160(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -224(%rbp)
+ movq %r13, %r9
+/APP
+ movq 80(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 88(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 96(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 104(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 112(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 120(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 128(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 136(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -216(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 88(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 152(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 160(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 168(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -208(%rbp)
+ movq %r13, %r9
+/APP
+ movq 96(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 104(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 112(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 120(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 128(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 136(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -200(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 104(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 152(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 160(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 168(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 176(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -192(%rbp)
+ movq %r13, %r9
+/APP
+ movq 112(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 120(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 128(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 136(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -184(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 120(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 152(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 160(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 168(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 176(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 184(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -176(%rbp)
+ movq %r13, %r9
+/APP
+ movq 128(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 136(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%r13
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%r13
+
+/NO_APP
+ movq %r12, -168(%rbp)
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 136(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %rbx
+ movq %rsi, %rax
+/APP
+ addq %r8,%r9
+ adcq %rbx,%r12
+ adcq %rax,%r13
+ addq %r8,%r9
+ adcq %rbx,%r12
+ adcq %rax,%r13
+
+/NO_APP
+ movq %rax, %r11
+ movq %rbx, %rdi
+ movq %r10, %rbx
+/APP
+ movq 192(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -160(%rbp)
+ movq %r13, %r9
+/APP
+ movq 144(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%rbx
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%rbx
+
+/NO_APP
+ movq %r12, -152(%rbp)
+/APP
+ movq 152(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rdx
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%rbx
+ adcq %r12,%rdx
+ addq %r8,%r9
+ adcq %r13,%rbx
+ adcq %r12,%rdx
+
+/NO_APP
+ movq %rdx, %rax
+ movq %r13, %rdi
+ movq %r12, %rsi
+ movq %rax, %r11
+ movq %r10, %r12
+/APP
+ movq 200(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r9, -144(%rbp)
+ movq %r11, %r9
+/APP
+ movq 160(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 200(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %rbx, -136(%rbp)
+ movq %r12, %r11
+/APP
+ movq 168(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 200(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 208(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r9, -128(%rbp)
+ movq %r11, %r9
+/APP
+ movq 176(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 200(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 208(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rdx
+/APP
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rbx, -120(%rbp)
+ movq %rdx, %r11
+ movq %r10, %rbx
+/APP
+ movq 184(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 200(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 208(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+/NO_APP
+ movq %rbx, %rdx
+ movq %r13, %rdi
+ movq %r11, %rbx
+ movq %r12, %rsi
+ movq %rdx, %r11
+ movq %r10, %r12
+/APP
+ movq 216(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r9, -112(%rbp)
+ movq %r11, %r9
+/APP
+ movq 192(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 200(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 208(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 216(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %rbx, -104(%rbp)
+ movq %r12, %r11
+/APP
+ movq 200(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 208(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 216(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+ movq %r10, %r12
+/APP
+ movq 224(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r9, -96(%rbp)
+ movq %r10, %r9
+/APP
+ movq 208(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 216(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 224(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %rax
+/APP
+ addq %r8,%rbx
+ adcq %r13,%r11
+ adcq %rax,%r9
+ addq %r8,%rbx
+ adcq %r13,%r11
+ adcq %rax,%r9
+
+/NO_APP
+ movq %rbx, -88(%rbp)
+ movq %r11, %rsi
+ movq %r9, %r8
+/APP
+ movq 216(%rcx),%rax
+ mulq 248(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r12
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r12, %r11
+/APP
+ movq 224(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r11
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r11
+
+/NO_APP
+ movq %r8, %r13
+ movq %r11, %rbx
+/APP
+ movq 232(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%r13
+ adcq $0,%rbx
+
+/NO_APP
+ movq %rsi, -80(%rbp)
+ movq %rbx, %r12
+ movq %r13, %rdi
+ movq %r10, %r13
+/APP
+ movq 224(%rcx),%rax
+ mulq 248(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%r12
+ adcq $0,%r13
+ addq %rax,%rdi
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r12, %r9
+ movq %r13, %r12
+/APP
+ movq 232(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%r9
+ adcq $0,%r12
+ addq %rax,%rdi
+ adcq %rdx,%r9
+ adcq $0,%r12
+
+/NO_APP
+ movq %rdi, -72(%rbp)
+ movq %r9, %r11
+ movq %r12, %rbx
+ movq %r10, %r9
+/APP
+ movq 232(%rcx),%rax
+ mulq 248(%rcx)
+ addq %rax,%r11
+ adcq %rdx,%rbx
+ adcq $0,%r9
+ addq %rax,%r11
+ adcq %rdx,%rbx
+ adcq $0,%r9
+
+/NO_APP
+ movq %rbx, %r13
+ movq %r9, %rbx
+ movq %r10, %r9
+/APP
+ movq 240(%rcx),%rax
+ mulq %rax
+ addq %rax,%r11
+ adcq %rdx,%r13
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r11, -64(%rbp)
+ movq %r13, %rdi
+ movq %rbx, %rsi
+/APP
+ movq 240(%rcx),%rax
+ mulq 248(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r9
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r9
+
+/NO_APP
+ movq %rdi, -56(%rbp)
+ movq %r9, %r8
+/APP
+ movq 248(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r10
+
+/NO_APP
+ movq %rsi, -48(%rbp)
+ movq 16(%r14), %rdi
+ leaq -544(%rbp), %rsi
+ movl $512, %edx
+ movq %r8, -40(%rbp)
+ movl $64, 8(%r14)
+ movl $0, (%r14)
+ call memcpy@PLT
+ movl 8(%r14), %edx
+ testl %edx, %edx
+ je .L304
+ leal -1(%rdx), %ecx
+ movq 16(%r14), %rsi
+ mov %ecx, %r10d
+ cmpq $0, (%rsi,%r10,8)
+ jne .L302
+ movl %ecx, %edx
+ .align 16
+.L303:
+ testl %edx, %edx
+ movl %edx, %ecx
+ je .L307
+ decl %edx
+ mov %edx, %eax
+ cmpq $0, (%rsi,%rax,8)
+ je .L303
+ movl %ecx, 8(%r14)
+ movl %ecx, %edx
+.L302:
+ testl %edx, %edx
+ je .L304
+ movl (%r14), %eax
+ movl %eax, (%r14)
+ addq $512, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ leave
+ ret
+.L307:
+ movl %edx, 8(%r14)
+ .align 16
+.L304:
+ xorl %eax, %eax
+ movl %eax, (%r14)
+ addq $512, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ leave
+ ret
+.LFE9:
+ .size s_mp_sqr_comba_32, .-s_mp_sqr_comba_32
diff --git a/security/nss/lib/freebl/mpi/mp_gf2m-priv.h b/security/nss/lib/freebl/mpi/mp_gf2m-priv.h
new file mode 100644
index 0000000000..5be4da4bf2
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_gf2m-priv.h
@@ -0,0 +1,73 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _MP_GF2M_PRIV_H_
+#define _MP_GF2M_PRIV_H_
+
+#include "mpi-priv.h"
+
+extern const mp_digit mp_gf2m_sqr_tb[16];
+
+#if defined(MP_USE_UINT_DIGIT)
+#define MP_DIGIT_BITS 32
+/* enable fast divide and mod operations on MP_DIGIT_BITS */
+#define MP_DIGIT_BITS_LOG_2 5
+#define MP_DIGIT_BITS_MASK 0x1f
+#else
+#define MP_DIGIT_BITS 64
+/* enable fast divide and mod operations on MP_DIGIT_BITS */
+#define MP_DIGIT_BITS_LOG_2 6
+#define MP_DIGIT_BITS_MASK 0x3f
+#endif
+
+/* Platform-specific macros for fast binary polynomial squaring. */
+#if MP_DIGIT_BITS == 32
+#define gf2m_SQR1(w) \
+ mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 16 | \
+ mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF]
+#define gf2m_SQR0(w) \
+ mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 | \
+ mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w)&0xF]
+#else
+#define gf2m_SQR1(w) \
+ mp_gf2m_sqr_tb[(w) >> 60 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 56 & 0xF] << 48 | \
+ mp_gf2m_sqr_tb[(w) >> 52 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 48 & 0xF] << 32 | \
+ mp_gf2m_sqr_tb[(w) >> 44 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 40 & 0xF] << 16 | \
+ mp_gf2m_sqr_tb[(w) >> 36 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 32 & 0xF]
+#define gf2m_SQR0(w) \
+ mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 48 | \
+ mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF] << 32 | \
+ mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 | \
+ mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w)&0xF]
+#endif
+
+/* Multiply two binary polynomials mp_digits a, b.
+ * Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1.
+ * Output in two mp_digits rh, rl.
+ */
+void s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b);
+
+/* Compute xor-multiply of two binary polynomials (a1, a0) x (b1, b0)
+ * result is a binary polynomial in 4 mp_digits r[4].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1,
+ const mp_digit b0);
+
+/* Compute xor-multiply of two binary polynomials (a2, a1, a0) x (b2, b1, b0)
+ * result is a binary polynomial in 6 mp_digits r[6].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0,
+ const mp_digit b2, const mp_digit b1, const mp_digit b0);
+
+/* Compute xor-multiply of two binary polynomials (a3, a2, a1, a0) x (b3, b2, b1, b0)
+ * result is a binary polynomial in 8 mp_digits r[8].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1,
+ const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1,
+ const mp_digit b0);
+
+#endif /* _MP_GF2M_PRIV_H_ */
diff --git a/security/nss/lib/freebl/mpi/mp_gf2m.c b/security/nss/lib/freebl/mpi/mp_gf2m.c
new file mode 100644
index 0000000000..878b7cae8c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_gf2m.c
@@ -0,0 +1,677 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mp_gf2m.h"
+#include "mp_gf2m-priv.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+
+const mp_digit mp_gf2m_sqr_tb[16] = {
+ 0, 1, 4, 5, 16, 17, 20, 21,
+ 64, 65, 68, 69, 80, 81, 84, 85
+};
+
+/* Multiply two binary polynomials mp_digits a, b.
+ * Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1.
+ * Output in two mp_digits rh, rl.
+ */
+#if MP_DIGIT_BITS == 32
+void
+s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b)
+{
+ register mp_digit h, l, s;
+ mp_digit tab[8], top2b = a >> 30;
+ register mp_digit a1, a2, a4;
+
+ a1 = a & (0x3FFFFFFF);
+ a2 = a1 << 1;
+ a4 = a2 << 1;
+
+ tab[0] = 0;
+ tab[1] = a1;
+ tab[2] = a2;
+ tab[3] = a1 ^ a2;
+ tab[4] = a4;
+ tab[5] = a1 ^ a4;
+ tab[6] = a2 ^ a4;
+ tab[7] = a1 ^ a2 ^ a4;
+
+ s = tab[b & 0x7];
+ l = s;
+ s = tab[b >> 3 & 0x7];
+ l ^= s << 3;
+ h = s >> 29;
+ s = tab[b >> 6 & 0x7];
+ l ^= s << 6;
+ h ^= s >> 26;
+ s = tab[b >> 9 & 0x7];
+ l ^= s << 9;
+ h ^= s >> 23;
+ s = tab[b >> 12 & 0x7];
+ l ^= s << 12;
+ h ^= s >> 20;
+ s = tab[b >> 15 & 0x7];
+ l ^= s << 15;
+ h ^= s >> 17;
+ s = tab[b >> 18 & 0x7];
+ l ^= s << 18;
+ h ^= s >> 14;
+ s = tab[b >> 21 & 0x7];
+ l ^= s << 21;
+ h ^= s >> 11;
+ s = tab[b >> 24 & 0x7];
+ l ^= s << 24;
+ h ^= s >> 8;
+ s = tab[b >> 27 & 0x7];
+ l ^= s << 27;
+ h ^= s >> 5;
+ s = tab[b >> 30];
+ l ^= s << 30;
+ h ^= s >> 2;
+
+ /* compensate for the top two bits of a */
+
+ if (top2b & 01) {
+ l ^= b << 30;
+ h ^= b >> 2;
+ }
+ if (top2b & 02) {
+ l ^= b << 31;
+ h ^= b >> 1;
+ }
+
+ *rh = h;
+ *rl = l;
+}
+#else
+void
+s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b)
+{
+ register mp_digit h, l, s;
+ mp_digit tab[16], top3b = a >> 61;
+ register mp_digit a1, a2, a4, a8;
+
+ a1 = a & (0x1FFFFFFFFFFFFFFFULL);
+ a2 = a1 << 1;
+ a4 = a2 << 1;
+ a8 = a4 << 1;
+ tab[0] = 0;
+ tab[1] = a1;
+ tab[2] = a2;
+ tab[3] = a1 ^ a2;
+ tab[4] = a4;
+ tab[5] = a1 ^ a4;
+ tab[6] = a2 ^ a4;
+ tab[7] = a1 ^ a2 ^ a4;
+ tab[8] = a8;
+ tab[9] = a1 ^ a8;
+ tab[10] = a2 ^ a8;
+ tab[11] = a1 ^ a2 ^ a8;
+ tab[12] = a4 ^ a8;
+ tab[13] = a1 ^ a4 ^ a8;
+ tab[14] = a2 ^ a4 ^ a8;
+ tab[15] = a1 ^ a2 ^ a4 ^ a8;
+
+ s = tab[b & 0xF];
+ l = s;
+ s = tab[b >> 4 & 0xF];
+ l ^= s << 4;
+ h = s >> 60;
+ s = tab[b >> 8 & 0xF];
+ l ^= s << 8;
+ h ^= s >> 56;
+ s = tab[b >> 12 & 0xF];
+ l ^= s << 12;
+ h ^= s >> 52;
+ s = tab[b >> 16 & 0xF];
+ l ^= s << 16;
+ h ^= s >> 48;
+ s = tab[b >> 20 & 0xF];
+ l ^= s << 20;
+ h ^= s >> 44;
+ s = tab[b >> 24 & 0xF];
+ l ^= s << 24;
+ h ^= s >> 40;
+ s = tab[b >> 28 & 0xF];
+ l ^= s << 28;
+ h ^= s >> 36;
+ s = tab[b >> 32 & 0xF];
+ l ^= s << 32;
+ h ^= s >> 32;
+ s = tab[b >> 36 & 0xF];
+ l ^= s << 36;
+ h ^= s >> 28;
+ s = tab[b >> 40 & 0xF];
+ l ^= s << 40;
+ h ^= s >> 24;
+ s = tab[b >> 44 & 0xF];
+ l ^= s << 44;
+ h ^= s >> 20;
+ s = tab[b >> 48 & 0xF];
+ l ^= s << 48;
+ h ^= s >> 16;
+ s = tab[b >> 52 & 0xF];
+ l ^= s << 52;
+ h ^= s >> 12;
+ s = tab[b >> 56 & 0xF];
+ l ^= s << 56;
+ h ^= s >> 8;
+ s = tab[b >> 60];
+ l ^= s << 60;
+ h ^= s >> 4;
+
+ /* compensate for the top three bits of a */
+
+ if (top3b & 01) {
+ l ^= b << 61;
+ h ^= b >> 3;
+ }
+ if (top3b & 02) {
+ l ^= b << 62;
+ h ^= b >> 2;
+ }
+ if (top3b & 04) {
+ l ^= b << 63;
+ h ^= b >> 1;
+ }
+
+ *rh = h;
+ *rl = l;
+}
+#endif
+
+/* Compute xor-multiply of two binary polynomials (a1, a0) x (b1, b0)
+ * result is a binary polynomial in 4 mp_digits r[4].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void
+s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1,
+ const mp_digit b0)
+{
+ mp_digit m1, m0;
+ /* r[3] = h1, r[2] = h0; r[1] = l1; r[0] = l0 */
+ s_bmul_1x1(r + 3, r + 2, a1, b1);
+ s_bmul_1x1(r + 1, r, a0, b0);
+ s_bmul_1x1(&m1, &m0, a0 ^ a1, b0 ^ b1);
+ /* Correction on m1 ^= l1 ^ h1; m0 ^= l0 ^ h0; */
+ r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */
+ r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */
+}
+
+/* Compute xor-multiply of two binary polynomials (a2, a1, a0) x (b2, b1, b0)
+ * result is a binary polynomial in 6 mp_digits r[6].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void
+s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0,
+ const mp_digit b2, const mp_digit b1, const mp_digit b0)
+{
+ mp_digit zm[4];
+
+ s_bmul_1x1(r + 5, r + 4, a2, b2); /* fill top 2 words */
+ s_bmul_2x2(zm, a1, a2 ^ a0, b1, b2 ^ b0); /* fill middle 4 words */
+ s_bmul_2x2(r, a1, a0, b1, b0); /* fill bottom 4 words */
+
+ zm[3] ^= r[3];
+ zm[2] ^= r[2];
+ zm[1] ^= r[1] ^ r[5];
+ zm[0] ^= r[0] ^ r[4];
+
+ r[5] ^= zm[3];
+ r[4] ^= zm[2];
+ r[3] ^= zm[1];
+ r[2] ^= zm[0];
+}
+
+/* Compute xor-multiply of two binary polynomials (a3, a2, a1, a0) x (b3, b2, b1, b0)
+ * result is a binary polynomial in 8 mp_digits r[8].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void
+s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1,
+ const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1,
+ const mp_digit b0)
+{
+ mp_digit zm[4];
+
+ s_bmul_2x2(r + 4, a3, a2, b3, b2); /* fill top 4 words */
+ s_bmul_2x2(zm, a3 ^ a1, a2 ^ a0, b3 ^ b1, b2 ^ b0); /* fill middle 4 words */
+ s_bmul_2x2(r, a1, a0, b1, b0); /* fill bottom 4 words */
+
+ zm[3] ^= r[3] ^ r[7];
+ zm[2] ^= r[2] ^ r[6];
+ zm[1] ^= r[1] ^ r[5];
+ zm[0] ^= r[0] ^ r[4];
+
+ r[5] ^= zm[3];
+ r[4] ^= zm[2];
+ r[3] ^= zm[1];
+ r[2] ^= zm[0];
+}
+
+/* Compute addition of two binary polynomials a and b,
+ * store result in c; c could be a or b, a and b could be equal;
+ * c is the bitwise XOR of a and b.
+ */
+mp_err
+mp_badd(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_digit *pa, *pb, *pc;
+ mp_size ix;
+ mp_size used_pa, used_pb;
+ mp_err res = MP_OKAY;
+
+ /* Add all digits up to the precision of b. If b had more
+ * precision than a initially, swap a, b first
+ */
+ if (MP_USED(a) >= MP_USED(b)) {
+ pa = MP_DIGITS(a);
+ pb = MP_DIGITS(b);
+ used_pa = MP_USED(a);
+ used_pb = MP_USED(b);
+ } else {
+ pa = MP_DIGITS(b);
+ pb = MP_DIGITS(a);
+ used_pa = MP_USED(b);
+ used_pb = MP_USED(a);
+ }
+
+ /* Make sure c has enough precision for the output value */
+ MP_CHECKOK(s_mp_pad(c, used_pa));
+
+ /* Do word-by-word xor */
+ pc = MP_DIGITS(c);
+ for (ix = 0; ix < used_pb; ix++) {
+ (*pc++) = (*pa++) ^ (*pb++);
+ }
+
+ /* Finish the rest of digits until we're actually done */
+ for (; ix < used_pa; ++ix) {
+ *pc++ = *pa++;
+ }
+
+ MP_USED(c) = used_pa;
+ MP_SIGN(c) = ZPOS;
+ s_mp_clamp(c);
+
+CLEANUP:
+ return res;
+}
+
+#define s_mp_div2(a) MP_CHECKOK(mpl_rsh((a), (a), 1));
+
+/* Compute binary polynomial multiply d = a * b */
+static void
+s_bmul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d)
+{
+ mp_digit a_i, a0b0, a1b1, carry = 0;
+ while (a_len--) {
+ a_i = *a++;
+ s_bmul_1x1(&a1b1, &a0b0, a_i, b);
+ *d++ = a0b0 ^ carry;
+ carry = a1b1;
+ }
+ *d = carry;
+}
+
+/* Compute binary polynomial xor multiply accumulate d ^= a * b */
+static void
+s_bmul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d)
+{
+ mp_digit a_i, a0b0, a1b1, carry = 0;
+ while (a_len--) {
+ a_i = *a++;
+ s_bmul_1x1(&a1b1, &a0b0, a_i, b);
+ *d++ ^= a0b0 ^ carry;
+ carry = a1b1;
+ }
+ *d ^= carry;
+}
+
+/* Compute binary polynomial xor multiply c = a * b.
+ * All parameters may be identical.
+ */
+mp_err
+mp_bmul(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_digit *pb, b_i;
+ mp_int tmp;
+ mp_size ib, a_used, b_used;
+ mp_err res = MP_OKAY;
+
+ MP_DIGITS(&tmp) = 0;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (a == c) {
+ MP_CHECKOK(mp_init_copy(&tmp, a));
+ if (a == b)
+ b = &tmp;
+ a = &tmp;
+ } else if (b == c) {
+ MP_CHECKOK(mp_init_copy(&tmp, b));
+ b = &tmp;
+ }
+
+ if (MP_USED(a) < MP_USED(b)) {
+ const mp_int *xch = b; /* switch a and b if b longer */
+ b = a;
+ a = xch;
+ }
+
+ MP_USED(c) = 1;
+ MP_DIGIT(c, 0) = 0;
+ MP_CHECKOK(s_mp_pad(c, USED(a) + USED(b)));
+
+ pb = MP_DIGITS(b);
+ s_bmul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
+
+ /* Outer loop: Digits of b */
+ a_used = MP_USED(a);
+ b_used = MP_USED(b);
+ MP_USED(c) = a_used + b_used;
+ for (ib = 1; ib < b_used; ib++) {
+ b_i = *pb++;
+
+ /* Inner product: Digits of a */
+ if (b_i)
+ s_bmul_d_add(MP_DIGITS(a), a_used, b_i, MP_DIGITS(c) + ib);
+ else
+ MP_DIGIT(c, ib + a_used) = b_i;
+ }
+
+ s_mp_clamp(c);
+
+ SIGN(c) = ZPOS;
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+}
+
+/* Compute modular reduction of a and store result in r.
+ * r could be a.
+ * For modular arithmetic, the irreducible polynomial f(t) is represented
+ * as an array of int[], where f(t) is of the form:
+ * f(t) = t^p[0] + t^p[1] + ... + t^p[k]
+ * where m = p[0] > p[1] > ... > p[k] = 0.
+ */
+mp_err
+mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r)
+{
+ int j, k;
+ int n, dN, d0, d1;
+ mp_digit zz, *z, tmp;
+ mp_size used;
+ mp_err res = MP_OKAY;
+
+ /* The algorithm does the reduction in place in r,
+ * if a != r, copy a into r first so reduction can be done in r
+ */
+ if (a != r) {
+ MP_CHECKOK(mp_copy(a, r));
+ }
+ z = MP_DIGITS(r);
+
+ /* start reduction */
+ /*dN = p[0] / MP_DIGIT_BITS; */
+ dN = p[0] >> MP_DIGIT_BITS_LOG_2;
+ used = MP_USED(r);
+
+ for (j = used - 1; j > dN;) {
+
+ zz = z[j];
+ if (zz == 0) {
+ j--;
+ continue;
+ }
+ z[j] = 0;
+
+ for (k = 1; p[k] > 0; k++) {
+ /* reducing component t^p[k] */
+ n = p[0] - p[k];
+ /*d0 = n % MP_DIGIT_BITS; */
+ d0 = n & MP_DIGIT_BITS_MASK;
+ d1 = MP_DIGIT_BITS - d0;
+ /*n /= MP_DIGIT_BITS; */
+ n >>= MP_DIGIT_BITS_LOG_2;
+ z[j - n] ^= (zz >> d0);
+ if (d0)
+ z[j - n - 1] ^= (zz << d1);
+ }
+
+ /* reducing component t^0 */
+ n = dN;
+ /*d0 = p[0] % MP_DIGIT_BITS;*/
+ d0 = p[0] & MP_DIGIT_BITS_MASK;
+ d1 = MP_DIGIT_BITS - d0;
+ z[j - n] ^= (zz >> d0);
+ if (d0)
+ z[j - n - 1] ^= (zz << d1);
+ }
+
+ /* final round of reduction */
+ while (j == dN) {
+
+ /* d0 = p[0] % MP_DIGIT_BITS; */
+ d0 = p[0] & MP_DIGIT_BITS_MASK;
+ zz = z[dN] >> d0;
+ if (zz == 0)
+ break;
+ d1 = MP_DIGIT_BITS - d0;
+
+ /* clear up the top d1 bits */
+ if (d0) {
+ z[dN] = (z[dN] << d1) >> d1;
+ } else {
+ z[dN] = 0;
+ }
+ *z ^= zz; /* reduction t^0 component */
+
+ for (k = 1; p[k] > 0; k++) {
+ /* reducing component t^p[k]*/
+ /* n = p[k] / MP_DIGIT_BITS; */
+ n = p[k] >> MP_DIGIT_BITS_LOG_2;
+ /* d0 = p[k] % MP_DIGIT_BITS; */
+ d0 = p[k] & MP_DIGIT_BITS_MASK;
+ d1 = MP_DIGIT_BITS - d0;
+ z[n] ^= (zz << d0);
+ tmp = zz >> d1;
+ if (d0 && tmp)
+ z[n + 1] ^= tmp;
+ }
+ }
+
+ s_mp_clamp(r);
+CLEANUP:
+ return res;
+}
+
+/* Compute the product of two polynomials a and b, reduce modulo p,
+ * Store the result in r. r could be a or b; a could be b.
+ */
+mp_err
+mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[], mp_int *r)
+{
+ mp_err res;
+
+ if (a == b)
+ return mp_bsqrmod(a, p, r);
+ if ((res = mp_bmul(a, b, r)) != MP_OKAY)
+ return res;
+ return mp_bmod(r, p, r);
+}
+
+/* Compute binary polynomial squaring c = a*a mod p .
+ * Parameter r and a can be identical.
+ */
+
+mp_err
+mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r)
+{
+ mp_digit *pa, *pr, a_i;
+ mp_int tmp;
+ mp_size ia, a_used;
+ mp_err res;
+
+ ARGCHK(a != NULL && r != NULL, MP_BADARG);
+ MP_DIGITS(&tmp) = 0;
+
+ if (a == r) {
+ MP_CHECKOK(mp_init_copy(&tmp, a));
+ a = &tmp;
+ }
+
+ MP_USED(r) = 1;
+ MP_DIGIT(r, 0) = 0;
+ MP_CHECKOK(s_mp_pad(r, 2 * USED(a)));
+
+ pa = MP_DIGITS(a);
+ pr = MP_DIGITS(r);
+ a_used = MP_USED(a);
+ MP_USED(r) = 2 * a_used;
+
+ for (ia = 0; ia < a_used; ia++) {
+ a_i = *pa++;
+ *pr++ = gf2m_SQR0(a_i);
+ *pr++ = gf2m_SQR1(a_i);
+ }
+
+ MP_CHECKOK(mp_bmod(r, p, r));
+ s_mp_clamp(r);
+ SIGN(r) = ZPOS;
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+}
+
+/* Compute binary polynomial y/x mod p, y divided by x, reduce modulo p.
+ * Store the result in r. r could be x or y, and x could equal y.
+ * Uses algorithm Modular_Division_GF(2^m) from
+ * Chang-Shantz, S. "From Euclid's GCD to Montgomery Multiplication to
+ * the Great Divide".
+ */
+int
+mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp,
+ const unsigned int p[], mp_int *r)
+{
+ mp_int aa, bb, uu;
+ mp_int *a, *b, *u, *v;
+ mp_err res = MP_OKAY;
+
+ MP_DIGITS(&aa) = 0;
+ MP_DIGITS(&bb) = 0;
+ MP_DIGITS(&uu) = 0;
+
+ MP_CHECKOK(mp_init_copy(&aa, x));
+ MP_CHECKOK(mp_init_copy(&uu, y));
+ MP_CHECKOK(mp_init_copy(&bb, pp));
+ MP_CHECKOK(s_mp_pad(r, USED(pp)));
+ MP_USED(r) = 1;
+ MP_DIGIT(r, 0) = 0;
+
+ a = &aa;
+ b = &bb;
+ u = &uu;
+ v = r;
+ /* reduce x and y mod p */
+ MP_CHECKOK(mp_bmod(a, p, a));
+ MP_CHECKOK(mp_bmod(u, p, u));
+
+ while (!mp_isodd(a)) {
+ s_mp_div2(a);
+ if (mp_isodd(u)) {
+ MP_CHECKOK(mp_badd(u, pp, u));
+ }
+ s_mp_div2(u);
+ }
+
+ do {
+ if (mp_cmp_mag(b, a) > 0) {
+ MP_CHECKOK(mp_badd(b, a, b));
+ MP_CHECKOK(mp_badd(v, u, v));
+ do {
+ s_mp_div2(b);
+ if (mp_isodd(v)) {
+ MP_CHECKOK(mp_badd(v, pp, v));
+ }
+ s_mp_div2(v);
+ } while (!mp_isodd(b));
+ } else if ((MP_DIGIT(a, 0) == 1) && (MP_USED(a) == 1))
+ break;
+ else {
+ MP_CHECKOK(mp_badd(a, b, a));
+ MP_CHECKOK(mp_badd(u, v, u));
+ do {
+ s_mp_div2(a);
+ if (mp_isodd(u)) {
+ MP_CHECKOK(mp_badd(u, pp, u));
+ }
+ s_mp_div2(u);
+ } while (!mp_isodd(a));
+ }
+ } while (1);
+
+ MP_CHECKOK(mp_copy(u, r));
+
+CLEANUP:
+ mp_clear(&aa);
+ mp_clear(&bb);
+ mp_clear(&uu);
+ return res;
+}
+
+/* Convert the bit-string representation of a polynomial a into an array
+ * of integers corresponding to the bits with non-zero coefficient.
+ * Up to max elements of the array will be filled. Return value is total
+ * number of coefficients that would be extracted if array was large enough.
+ */
+int
+mp_bpoly2arr(const mp_int *a, unsigned int p[], int max)
+{
+ int i, j, k;
+ mp_digit top_bit, mask;
+
+ top_bit = 1;
+ top_bit <<= MP_DIGIT_BIT - 1;
+
+ for (k = 0; k < max; k++)
+ p[k] = 0;
+ k = 0;
+
+ for (i = MP_USED(a) - 1; i >= 0; i--) {
+ mask = top_bit;
+ for (j = MP_DIGIT_BIT - 1; j >= 0; j--) {
+ if (MP_DIGITS(a)[i] & mask) {
+ if (k < max)
+ p[k] = MP_DIGIT_BIT * i + j;
+ k++;
+ }
+ mask >>= 1;
+ }
+ }
+
+ return k;
+}
+
+/* Convert the coefficient array representation of a polynomial to a
+ * bit-string. The array must be terminated by 0.
+ */
+mp_err
+mp_barr2poly(const unsigned int p[], mp_int *a)
+{
+
+ mp_err res = MP_OKAY;
+ int i;
+
+ mp_zero(a);
+ for (i = 0; p[i] > 0; i++) {
+ MP_CHECKOK(mpl_set_bit(a, p[i], 1));
+ }
+ MP_CHECKOK(mpl_set_bit(a, 0, 1));
+
+CLEANUP:
+ return res;
+}
diff --git a/security/nss/lib/freebl/mpi/mp_gf2m.h b/security/nss/lib/freebl/mpi/mp_gf2m.h
new file mode 100644
index 0000000000..ed2c85493c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_gf2m.h
@@ -0,0 +1,28 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _MP_GF2M_H_
+#define _MP_GF2M_H_
+
+#include "mpi.h"
+
+mp_err mp_badd(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_bmul(const mp_int *a, const mp_int *b, mp_int *c);
+
+/* For modular arithmetic, the irreducible polynomial f(t) is represented
+ * as an array of int[], where f(t) is of the form:
+ * f(t) = t^p[0] + t^p[1] + ... + t^p[k]
+ * where m = p[0] > p[1] > ... > p[k] = 0.
+ */
+mp_err mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r);
+mp_err mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[],
+ mp_int *r);
+mp_err mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r);
+mp_err mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp,
+ const unsigned int p[], mp_int *r);
+
+int mp_bpoly2arr(const mp_int *a, unsigned int p[], int max);
+mp_err mp_barr2poly(const unsigned int p[], mp_int *a);
+
+#endif /* _MP_GF2M_H_ */
diff --git a/security/nss/lib/freebl/mpi/mpcpucache.c b/security/nss/lib/freebl/mpi/mpcpucache.c
new file mode 100644
index 0000000000..ddc21ec1cb
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpcpucache.c
@@ -0,0 +1,788 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+#include "prtypes.h"
+
+/*
+ * This file implements a single function: s_mpi_getProcessorLineSize();
+ * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
+ * if a cache exists, or zero if there is no cache. If more than one
+ * cache line exists, it should return the smallest line size (which is
+ * usually the L1 cache).
+ *
+ * mp_modexp uses this information to make sure that private key information
+ * isn't being leaked through the cache.
+ *
+ * Currently the file returns good data for most modern x86 processors, and
+ * reasonable data on 64-bit ppc processors. All other processors are assumed
+ * to have a cache line size of 32 bytes.
+ *
+ */
+
+#if defined(i386) || defined(__i386) || defined(__X86__) || defined(_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
+/* X86 processors have special instructions that tell us about the cache */
+#include "string.h"
+
+#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
+#define AMD_64 1
+#endif
+
+/* Generic CPUID function */
+#if defined(AMD_64)
+
+#if defined(__GNUC__)
+
+void
+freebl_cpuid(unsigned long op, unsigned long *eax,
+ unsigned long *ebx, unsigned long *ecx,
+ unsigned long *edx)
+{
+ __asm__("xor %%ecx, %%ecx\n\t"
+ "cpuid\n\t"
+ : "=a"(*eax),
+ "=b"(*ebx),
+ "=c"(*ecx),
+ "=d"(*edx)
+ : "0"(op));
+}
+
+#elif defined(_MSC_VER)
+
+#include <intrin.h>
+
+void
+freebl_cpuid(unsigned long op, unsigned long *eax,
+ unsigned long *ebx, unsigned long *ecx,
+ unsigned long *edx)
+{
+ int intrinsic_out[4];
+
+ __cpuid(intrinsic_out, op);
+ *eax = intrinsic_out[0];
+ *ebx = intrinsic_out[1];
+ *ecx = intrinsic_out[2];
+ *edx = intrinsic_out[3];
+}
+
+#endif
+
+#else /* !defined(AMD_64) */
+
+/* x86 */
+
+#if defined(__GNUC__)
+void
+freebl_cpuid(unsigned long op, unsigned long *eax,
+ unsigned long *ebx, unsigned long *ecx,
+ unsigned long *edx)
+{
+ /* Some older processors don't fill the ecx register with cpuid, so clobber it
+ * before calling cpuid, so that there's no risk of picking random bits that
+ * erroneously indicate that absent CPU features are present.
+ * Also, GCC isn't smart enough to save the ebx PIC register on its own
+ * in this case, so do it by hand. Use edi to store ebx and pass the
+ * value returned in ebx from cpuid through edi. */
+ __asm__("xor %%ecx, %%ecx\n\t"
+ "mov %%ebx,%%edi\n\t"
+ "cpuid\n\t"
+ "xchgl %%ebx,%%edi\n\t"
+ : "=a"(*eax),
+ "=D"(*ebx),
+ "=c"(*ecx),
+ "=d"(*edx)
+ : "0"(op));
+}
+
+/*
+ * try flipping a processor flag to determine CPU type
+ */
+static unsigned long
+changeFlag(unsigned long flag)
+{
+ unsigned long changedFlags, originalFlags;
+ __asm__("pushfl\n\t" /* get the flags */
+ "popl %0\n\t"
+ "movl %0,%1\n\t" /* save the original flags */
+ "xorl %2,%0\n\t" /* flip the bit */
+ "pushl %0\n\t" /* set the flags */
+ "popfl\n\t"
+ "pushfl\n\t" /* get the flags again (for return) */
+ "popl %0\n\t"
+ "pushl %1\n\t" /* restore the original flags */
+ "popfl\n\t"
+ : "=r"(changedFlags),
+ "=r"(originalFlags),
+ "=r"(flag)
+ : "2"(flag));
+ return changedFlags ^ originalFlags;
+}
+
+#elif defined(_MSC_VER)
+
+/*
+ * windows versions of the above assembler
+ */
+#define wcpuid __asm __emit 0fh __asm __emit 0a2h
+void
+freebl_cpuid(unsigned long op, unsigned long *Reax,
+ unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
+{
+ unsigned long Leax, Lebx, Lecx, Ledx;
+ __asm {
+ pushad
+ xor ecx,ecx
+ mov eax,op
+ wcpuid
+ mov Leax,eax
+ mov Lebx,ebx
+ mov Lecx,ecx
+ mov Ledx,edx
+ popad
+ }
+ *Reax = Leax;
+ *Rebx = Lebx;
+ *Recx = Lecx;
+ *Redx = Ledx;
+}
+
+static unsigned long
+changeFlag(unsigned long flag)
+{
+ unsigned long changedFlags, originalFlags;
+ __asm {
+ push eax
+ push ebx
+ pushfd /* get the flags */
+ pop eax
+ push eax /* save the flags on the stack */
+ mov originalFlags,eax /* save the original flags */
+ mov ebx,flag
+ xor eax,ebx /* flip the bit */
+ push eax /* set the flags */
+ popfd
+ pushfd /* get the flags again (for return) */
+ pop eax
+ popfd /* restore the original flags */
+ mov changedFlags,eax
+ pop ebx
+ pop eax
+ }
+ return changedFlags ^ originalFlags;
+}
+#endif
+
+#endif
+
+#if !defined(AMD_64)
+#define AC_FLAG 0x40000
+#define ID_FLAG 0x200000
+
+/* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
+static int
+is386()
+{
+ return changeFlag(AC_FLAG) == 0;
+}
+
+/* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
+static int
+is486()
+{
+ return changeFlag(ID_FLAG) == 0;
+}
+#endif
+
+/*
+ * table for Intel Cache.
+ * See Intel Application Note AP-485 for more information
+ */
+
+typedef unsigned char CacheTypeEntry;
+
+typedef enum {
+ Cache_NONE = 0,
+ Cache_UNKNOWN = 1,
+ Cache_TLB = 2,
+ Cache_TLBi = 3,
+ Cache_TLBd = 4,
+ Cache_Trace = 5,
+ Cache_L1 = 6,
+ Cache_L1i = 7,
+ Cache_L1d = 8,
+ Cache_L2 = 9,
+ Cache_L2i = 10,
+ Cache_L2d = 11,
+ Cache_L3 = 12,
+ Cache_L3i = 13,
+ Cache_L3d = 14
+} CacheType;
+
+struct _cache {
+ CacheTypeEntry type;
+ unsigned char lineSize;
+};
+static const struct _cache CacheMap[256] = {
+ /* 00 */ { Cache_NONE, 0 },
+ /* 01 */ { Cache_TLBi, 0 },
+ /* 02 */ { Cache_TLBi, 0 },
+ /* 03 */ { Cache_TLBd, 0 },
+ /* 04 */ {
+ Cache_TLBd,
+ },
+ /* 05 */ { Cache_UNKNOWN, 0 },
+ /* 06 */ { Cache_L1i, 32 },
+ /* 07 */ { Cache_UNKNOWN, 0 },
+ /* 08 */ { Cache_L1i, 32 },
+ /* 09 */ { Cache_UNKNOWN, 0 },
+ /* 0a */ { Cache_L1d, 32 },
+ /* 0b */ { Cache_UNKNOWN, 0 },
+ /* 0c */ { Cache_L1d, 32 },
+ /* 0d */ { Cache_UNKNOWN, 0 },
+ /* 0e */ { Cache_UNKNOWN, 0 },
+ /* 0f */ { Cache_UNKNOWN, 0 },
+ /* 10 */ { Cache_UNKNOWN, 0 },
+ /* 11 */ { Cache_UNKNOWN, 0 },
+ /* 12 */ { Cache_UNKNOWN, 0 },
+ /* 13 */ { Cache_UNKNOWN, 0 },
+ /* 14 */ { Cache_UNKNOWN, 0 },
+ /* 15 */ { Cache_UNKNOWN, 0 },
+ /* 16 */ { Cache_UNKNOWN, 0 },
+ /* 17 */ { Cache_UNKNOWN, 0 },
+ /* 18 */ { Cache_UNKNOWN, 0 },
+ /* 19 */ { Cache_UNKNOWN, 0 },
+ /* 1a */ { Cache_UNKNOWN, 0 },
+ /* 1b */ { Cache_UNKNOWN, 0 },
+ /* 1c */ { Cache_UNKNOWN, 0 },
+ /* 1d */ { Cache_UNKNOWN, 0 },
+ /* 1e */ { Cache_UNKNOWN, 0 },
+ /* 1f */ { Cache_UNKNOWN, 0 },
+ /* 20 */ { Cache_UNKNOWN, 0 },
+ /* 21 */ { Cache_UNKNOWN, 0 },
+ /* 22 */ { Cache_L3, 64 },
+ /* 23 */ { Cache_L3, 64 },
+ /* 24 */ { Cache_UNKNOWN, 0 },
+ /* 25 */ { Cache_L3, 64 },
+ /* 26 */ { Cache_UNKNOWN, 0 },
+ /* 27 */ { Cache_UNKNOWN, 0 },
+ /* 28 */ { Cache_UNKNOWN, 0 },
+ /* 29 */ { Cache_L3, 64 },
+ /* 2a */ { Cache_UNKNOWN, 0 },
+ /* 2b */ { Cache_UNKNOWN, 0 },
+ /* 2c */ { Cache_L1d, 64 },
+ /* 2d */ { Cache_UNKNOWN, 0 },
+ /* 2e */ { Cache_UNKNOWN, 0 },
+ /* 2f */ { Cache_UNKNOWN, 0 },
+ /* 30 */ { Cache_L1i, 64 },
+ /* 31 */ { Cache_UNKNOWN, 0 },
+ /* 32 */ { Cache_UNKNOWN, 0 },
+ /* 33 */ { Cache_UNKNOWN, 0 },
+ /* 34 */ { Cache_UNKNOWN, 0 },
+ /* 35 */ { Cache_UNKNOWN, 0 },
+ /* 36 */ { Cache_UNKNOWN, 0 },
+ /* 37 */ { Cache_UNKNOWN, 0 },
+ /* 38 */ { Cache_UNKNOWN, 0 },
+ /* 39 */ { Cache_L2, 64 },
+ /* 3a */ { Cache_UNKNOWN, 0 },
+ /* 3b */ { Cache_L2, 64 },
+ /* 3c */ { Cache_L2, 64 },
+ /* 3d */ { Cache_UNKNOWN, 0 },
+ /* 3e */ { Cache_UNKNOWN, 0 },
+ /* 3f */ { Cache_UNKNOWN, 0 },
+ /* 40 */ { Cache_L2, 0 },
+ /* 41 */ { Cache_L2, 32 },
+ /* 42 */ { Cache_L2, 32 },
+ /* 43 */ { Cache_L2, 32 },
+ /* 44 */ { Cache_L2, 32 },
+ /* 45 */ { Cache_L2, 32 },
+ /* 46 */ { Cache_UNKNOWN, 0 },
+ /* 47 */ { Cache_UNKNOWN, 0 },
+ /* 48 */ { Cache_UNKNOWN, 0 },
+ /* 49 */ { Cache_UNKNOWN, 0 },
+ /* 4a */ { Cache_UNKNOWN, 0 },
+ /* 4b */ { Cache_UNKNOWN, 0 },
+ /* 4c */ { Cache_UNKNOWN, 0 },
+ /* 4d */ { Cache_UNKNOWN, 0 },
+ /* 4e */ { Cache_UNKNOWN, 0 },
+ /* 4f */ { Cache_UNKNOWN, 0 },
+ /* 50 */ { Cache_TLBi, 0 },
+ /* 51 */ { Cache_TLBi, 0 },
+ /* 52 */ { Cache_TLBi, 0 },
+ /* 53 */ { Cache_UNKNOWN, 0 },
+ /* 54 */ { Cache_UNKNOWN, 0 },
+ /* 55 */ { Cache_UNKNOWN, 0 },
+ /* 56 */ { Cache_UNKNOWN, 0 },
+ /* 57 */ { Cache_UNKNOWN, 0 },
+ /* 58 */ { Cache_UNKNOWN, 0 },
+ /* 59 */ { Cache_UNKNOWN, 0 },
+ /* 5a */ { Cache_UNKNOWN, 0 },
+ /* 5b */ { Cache_TLBd, 0 },
+ /* 5c */ { Cache_TLBd, 0 },
+ /* 5d */ { Cache_TLBd, 0 },
+ /* 5e */ { Cache_UNKNOWN, 0 },
+ /* 5f */ { Cache_UNKNOWN, 0 },
+ /* 60 */ { Cache_UNKNOWN, 0 },
+ /* 61 */ { Cache_UNKNOWN, 0 },
+ /* 62 */ { Cache_UNKNOWN, 0 },
+ /* 63 */ { Cache_UNKNOWN, 0 },
+ /* 64 */ { Cache_UNKNOWN, 0 },
+ /* 65 */ { Cache_UNKNOWN, 0 },
+ /* 66 */ { Cache_L1d, 64 },
+ /* 67 */ { Cache_L1d, 64 },
+ /* 68 */ { Cache_L1d, 64 },
+ /* 69 */ { Cache_UNKNOWN, 0 },
+ /* 6a */ { Cache_UNKNOWN, 0 },
+ /* 6b */ { Cache_UNKNOWN, 0 },
+ /* 6c */ { Cache_UNKNOWN, 0 },
+ /* 6d */ { Cache_UNKNOWN, 0 },
+ /* 6e */ { Cache_UNKNOWN, 0 },
+ /* 6f */ { Cache_UNKNOWN, 0 },
+ /* 70 */ { Cache_Trace, 1 },
+ /* 71 */ { Cache_Trace, 1 },
+ /* 72 */ { Cache_Trace, 1 },
+ /* 73 */ { Cache_UNKNOWN, 0 },
+ /* 74 */ { Cache_UNKNOWN, 0 },
+ /* 75 */ { Cache_UNKNOWN, 0 },
+ /* 76 */ { Cache_UNKNOWN, 0 },
+ /* 77 */ { Cache_UNKNOWN, 0 },
+ /* 78 */ { Cache_UNKNOWN, 0 },
+ /* 79 */ { Cache_L2, 64 },
+ /* 7a */ { Cache_L2, 64 },
+ /* 7b */ { Cache_L2, 64 },
+ /* 7c */ { Cache_L2, 64 },
+ /* 7d */ { Cache_UNKNOWN, 0 },
+ /* 7e */ { Cache_UNKNOWN, 0 },
+ /* 7f */ { Cache_UNKNOWN, 0 },
+ /* 80 */ { Cache_UNKNOWN, 0 },
+ /* 81 */ { Cache_UNKNOWN, 0 },
+ /* 82 */ { Cache_L2, 32 },
+ /* 83 */ { Cache_L2, 32 },
+ /* 84 */ { Cache_L2, 32 },
+ /* 85 */ { Cache_L2, 32 },
+ /* 86 */ { Cache_L2, 64 },
+ /* 87 */ { Cache_L2, 64 },
+ /* 88 */ { Cache_UNKNOWN, 0 },
+ /* 89 */ { Cache_UNKNOWN, 0 },
+ /* 8a */ { Cache_UNKNOWN, 0 },
+ /* 8b */ { Cache_UNKNOWN, 0 },
+ /* 8c */ { Cache_UNKNOWN, 0 },
+ /* 8d */ { Cache_UNKNOWN, 0 },
+ /* 8e */ { Cache_UNKNOWN, 0 },
+ /* 8f */ { Cache_UNKNOWN, 0 },
+ /* 90 */ { Cache_UNKNOWN, 0 },
+ /* 91 */ { Cache_UNKNOWN, 0 },
+ /* 92 */ { Cache_UNKNOWN, 0 },
+ /* 93 */ { Cache_UNKNOWN, 0 },
+ /* 94 */ { Cache_UNKNOWN, 0 },
+ /* 95 */ { Cache_UNKNOWN, 0 },
+ /* 96 */ { Cache_UNKNOWN, 0 },
+ /* 97 */ { Cache_UNKNOWN, 0 },
+ /* 98 */ { Cache_UNKNOWN, 0 },
+ /* 99 */ { Cache_UNKNOWN, 0 },
+ /* 9a */ { Cache_UNKNOWN, 0 },
+ /* 9b */ { Cache_UNKNOWN, 0 },
+ /* 9c */ { Cache_UNKNOWN, 0 },
+ /* 9d */ { Cache_UNKNOWN, 0 },
+ /* 9e */ { Cache_UNKNOWN, 0 },
+ /* 9f */ { Cache_UNKNOWN, 0 },
+ /* a0 */ { Cache_UNKNOWN, 0 },
+ /* a1 */ { Cache_UNKNOWN, 0 },
+ /* a2 */ { Cache_UNKNOWN, 0 },
+ /* a3 */ { Cache_UNKNOWN, 0 },
+ /* a4 */ { Cache_UNKNOWN, 0 },
+ /* a5 */ { Cache_UNKNOWN, 0 },
+ /* a6 */ { Cache_UNKNOWN, 0 },
+ /* a7 */ { Cache_UNKNOWN, 0 },
+ /* a8 */ { Cache_UNKNOWN, 0 },
+ /* a9 */ { Cache_UNKNOWN, 0 },
+ /* aa */ { Cache_UNKNOWN, 0 },
+ /* ab */ { Cache_UNKNOWN, 0 },
+ /* ac */ { Cache_UNKNOWN, 0 },
+ /* ad */ { Cache_UNKNOWN, 0 },
+ /* ae */ { Cache_UNKNOWN, 0 },
+ /* af */ { Cache_UNKNOWN, 0 },
+ /* b0 */ { Cache_TLBi, 0 },
+ /* b1 */ { Cache_UNKNOWN, 0 },
+ /* b2 */ { Cache_UNKNOWN, 0 },
+ /* b3 */ { Cache_TLBd, 0 },
+ /* b4 */ { Cache_UNKNOWN, 0 },
+ /* b5 */ { Cache_UNKNOWN, 0 },
+ /* b6 */ { Cache_UNKNOWN, 0 },
+ /* b7 */ { Cache_UNKNOWN, 0 },
+ /* b8 */ { Cache_UNKNOWN, 0 },
+ /* b9 */ { Cache_UNKNOWN, 0 },
+ /* ba */ { Cache_UNKNOWN, 0 },
+ /* bb */ { Cache_UNKNOWN, 0 },
+ /* bc */ { Cache_UNKNOWN, 0 },
+ /* bd */ { Cache_UNKNOWN, 0 },
+ /* be */ { Cache_UNKNOWN, 0 },
+ /* bf */ { Cache_UNKNOWN, 0 },
+ /* c0 */ { Cache_UNKNOWN, 0 },
+ /* c1 */ { Cache_UNKNOWN, 0 },
+ /* c2 */ { Cache_UNKNOWN, 0 },
+ /* c3 */ { Cache_UNKNOWN, 0 },
+ /* c4 */ { Cache_UNKNOWN, 0 },
+ /* c5 */ { Cache_UNKNOWN, 0 },
+ /* c6 */ { Cache_UNKNOWN, 0 },
+ /* c7 */ { Cache_UNKNOWN, 0 },
+ /* c8 */ { Cache_UNKNOWN, 0 },
+ /* c9 */ { Cache_UNKNOWN, 0 },
+ /* ca */ { Cache_UNKNOWN, 0 },
+ /* cb */ { Cache_UNKNOWN, 0 },
+ /* cc */ { Cache_UNKNOWN, 0 },
+ /* cd */ { Cache_UNKNOWN, 0 },
+ /* ce */ { Cache_UNKNOWN, 0 },
+ /* cf */ { Cache_UNKNOWN, 0 },
+ /* d0 */ { Cache_UNKNOWN, 0 },
+ /* d1 */ { Cache_UNKNOWN, 0 },
+ /* d2 */ { Cache_UNKNOWN, 0 },
+ /* d3 */ { Cache_UNKNOWN, 0 },
+ /* d4 */ { Cache_UNKNOWN, 0 },
+ /* d5 */ { Cache_UNKNOWN, 0 },
+ /* d6 */ { Cache_UNKNOWN, 0 },
+ /* d7 */ { Cache_UNKNOWN, 0 },
+ /* d8 */ { Cache_UNKNOWN, 0 },
+ /* d9 */ { Cache_UNKNOWN, 0 },
+ /* da */ { Cache_UNKNOWN, 0 },
+ /* db */ { Cache_UNKNOWN, 0 },
+ /* dc */ { Cache_UNKNOWN, 0 },
+ /* dd */ { Cache_UNKNOWN, 0 },
+ /* de */ { Cache_UNKNOWN, 0 },
+ /* df */ { Cache_UNKNOWN, 0 },
+ /* e0 */ { Cache_UNKNOWN, 0 },
+ /* e1 */ { Cache_UNKNOWN, 0 },
+ /* e2 */ { Cache_UNKNOWN, 0 },
+ /* e3 */ { Cache_UNKNOWN, 0 },
+ /* e4 */ { Cache_UNKNOWN, 0 },
+ /* e5 */ { Cache_UNKNOWN, 0 },
+ /* e6 */ { Cache_UNKNOWN, 0 },
+ /* e7 */ { Cache_UNKNOWN, 0 },
+ /* e8 */ { Cache_UNKNOWN, 0 },
+ /* e9 */ { Cache_UNKNOWN, 0 },
+ /* ea */ { Cache_UNKNOWN, 0 },
+ /* eb */ { Cache_UNKNOWN, 0 },
+ /* ec */ { Cache_UNKNOWN, 0 },
+ /* ed */ { Cache_UNKNOWN, 0 },
+ /* ee */ { Cache_UNKNOWN, 0 },
+ /* ef */ { Cache_UNKNOWN, 0 },
+ /* f0 */ { Cache_UNKNOWN, 0 },
+ /* f1 */ { Cache_UNKNOWN, 0 },
+ /* f2 */ { Cache_UNKNOWN, 0 },
+ /* f3 */ { Cache_UNKNOWN, 0 },
+ /* f4 */ { Cache_UNKNOWN, 0 },
+ /* f5 */ { Cache_UNKNOWN, 0 },
+ /* f6 */ { Cache_UNKNOWN, 0 },
+ /* f7 */ { Cache_UNKNOWN, 0 },
+ /* f8 */ { Cache_UNKNOWN, 0 },
+ /* f9 */ { Cache_UNKNOWN, 0 },
+ /* fa */ { Cache_UNKNOWN, 0 },
+ /* fb */ { Cache_UNKNOWN, 0 },
+ /* fc */ { Cache_UNKNOWN, 0 },
+ /* fd */ { Cache_UNKNOWN, 0 },
+ /* fe */ { Cache_UNKNOWN, 0 },
+ /* ff */ { Cache_UNKNOWN, 0 }
+};
+
+/*
+ * use the above table to determine the CacheEntryLineSize.
+ */
+static void
+getIntelCacheEntryLineSize(unsigned long val, int *level,
+ unsigned long *lineSize)
+{
+ CacheType type;
+
+ type = CacheMap[val].type;
+ /* only interested in data caches */
+ /* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
+ * this data check has the side effect of rejecting that entry. If
+ * that wasn't the case, we could have to reject it explicitly */
+ if (CacheMap[val].lineSize == 0) {
+ return;
+ }
+ /* look at the caches, skip types we aren't interested in.
+ * if we already have a value for a lower level cache, skip the
+ * current entry */
+ if ((type == Cache_L1) || (type == Cache_L1d)) {
+ *level = 1;
+ *lineSize = CacheMap[val].lineSize;
+ } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
+ *level = 2;
+ *lineSize = CacheMap[val].lineSize;
+ } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
+ *level = 3;
+ *lineSize = CacheMap[val].lineSize;
+ }
+ return;
+}
+
+static void
+getIntelRegisterCacheLineSize(unsigned long val,
+ int *level, unsigned long *lineSize)
+{
+ getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
+ getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
+ getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
+ getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
+}
+
+/*
+ * returns '0' if no recognized cache is found, or if the cache
+ * information is supported by this processor
+ */
+static unsigned long
+getIntelCacheLineSize(int cpuidLevel)
+{
+ int level = 4;
+ unsigned long lineSize = 0;
+ unsigned long eax, ebx, ecx, edx;
+ int repeat, count;
+
+ if (cpuidLevel < 2) {
+ return 0;
+ }
+
+ /* command '2' of the cpuid is intel's cache info call. Each byte of the
+ * 4 registers contain a potential descriptor for the cache. The CacheMap
+ * table maps the cache entry with the processor cache. Register 'al'
+ * contains a count value that cpuid '2' needs to be called in order to
+ * find all the cache descriptors. Only registers with the high bit set
+ * to 'zero' have valid descriptors. This code loops through all the
+ * required calls to cpuid '2' and passes any valid descriptors it finds
+ * to the getIntelRegisterCacheLineSize code, which breaks the registers
+ * down into their component descriptors. In the end the lineSize of the
+ * lowest level cache data cache is returned. */
+ freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
+ repeat = eax & 0xf;
+ for (count = 0; count < repeat; count++) {
+ if ((eax & 0x80000000) == 0) {
+ getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
+ }
+ if ((ebx & 0x80000000) == 0) {
+ getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
+ }
+ if ((ecx & 0x80000000) == 0) {
+ getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
+ }
+ if ((edx & 0x80000000) == 0) {
+ getIntelRegisterCacheLineSize(edx, &level, &lineSize);
+ }
+ if (count + 1 != repeat) {
+ freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
+ }
+ }
+ return lineSize;
+}
+
+/*
+ * returns '0' if the cache info is not supported by this processor.
+ * This is based on the AMD extended cache commands for cpuid.
+ * (see "AMD Processor Recognition Application Note" Publication 20734).
+ * Some other processors use the identical scheme.
+ * (see "Processor Recognition, Transmeta Corporation").
+ */
+static unsigned long
+getOtherCacheLineSize(unsigned long cpuidLevel)
+{
+ unsigned long lineSize = 0;
+ unsigned long eax, ebx, ecx, edx;
+
+ /* get the Extended CPUID level */
+ freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
+ cpuidLevel = eax;
+
+ if (cpuidLevel >= 0x80000005) {
+ freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
+ lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
+ }
+ return lineSize;
+}
+
+static const char *const manMap[] = {
+#define INTEL 0
+ "GenuineIntel",
+#define AMD 1
+ "AuthenticAMD",
+#define CYRIX 2
+ "CyrixInstead",
+#define CENTAUR 2
+ "CentaurHauls",
+#define NEXGEN 3
+ "NexGenDriven",
+#define TRANSMETA 4
+ "GenuineTMx86",
+#define RISE 5
+ "RiseRiseRise",
+#define UMC 6
+ "UMC UMC UMC ",
+#define SIS 7
+ "Sis Sis Sis ",
+#define NATIONAL 8
+ "Geode by NSC",
+};
+
+static const int n_manufacturers = sizeof(manMap) / sizeof(manMap[0]);
+
+#define MAN_UNKNOWN 9
+
+#if !defined(AMD_64)
+#define SSE2_FLAG (1 << 26)
+unsigned long
+s_mpi_is_sse2()
+{
+ unsigned long eax, ebx, ecx, edx;
+
+ if (is386() || is486()) {
+ return 0;
+ }
+ freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
+
+ /* has no SSE2 extensions */
+ if (eax == 0) {
+ return 0;
+ }
+
+ freebl_cpuid(1, &eax, &ebx, &ecx, &edx);
+ return (edx & SSE2_FLAG) == SSE2_FLAG;
+}
+#endif
+
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+ unsigned long eax, ebx, ecx, edx;
+ PRUint32 cpuid[3];
+ unsigned long cpuidLevel;
+ unsigned long cacheLineSize = 0;
+ int manufacturer = MAN_UNKNOWN;
+ int i;
+ char string[13];
+
+#if !defined(AMD_64)
+ if (is386()) {
+ return 0; /* 386 had no cache */
+ }
+ if (is486()) {
+ return 32; /* really? need more info */
+ }
+#endif
+
+ /* Pentium, cpuid command is available */
+ freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
+ cpuidLevel = eax;
+ /* string holds the CPU's manufacturer ID string - a twelve
+ * character ASCII string stored in ebx, edx, ecx, and
+ * the 32-bit extended feature flags are in edx, ecx.
+ */
+ cpuid[0] = ebx;
+ cpuid[1] = ecx;
+ cpuid[2] = edx;
+ memcpy(string, cpuid, sizeof(cpuid));
+ string[12] = 0;
+
+ manufacturer = MAN_UNKNOWN;
+ for (i = 0; i < n_manufacturers; i++) {
+ if (strcmp(manMap[i], string) == 0) {
+ manufacturer = i;
+ }
+ }
+
+ if (manufacturer == INTEL) {
+ cacheLineSize = getIntelCacheLineSize(cpuidLevel);
+ } else {
+ cacheLineSize = getOtherCacheLineSize(cpuidLevel);
+ }
+ /* doesn't support cache info based on cpuid. This means
+ * an old pentium class processor, which have cache lines of
+ * 32. If we learn differently, we can use a switch based on
+ * the Manufacturer id */
+ if (cacheLineSize == 0) {
+ cacheLineSize = 32;
+ }
+ return cacheLineSize;
+}
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
+#endif
+
+#if defined(__ppc64__)
+/*
+ * Sigh, The PPC has some really nice features to help us determine cache
+ * size, since it had lots of direct control functions to do so. The POWER
+ * processor even has an instruction to do this, but it was dropped in
+ * PowerPC. Unfortunately most of them are not available in user mode.
+ *
+ * The dcbz function would be a great way to determine cache line size except
+ * 1) it only works on write-back memory (it throws an exception otherwise),
+ * and 2) because so many mac programs 'knew' the processor cache size was
+ * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
+ * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
+ * these programs happy. dcbzl work if 64 bit instructions are supported.
+ * If you know 64 bit instructions are supported, and that stack is
+ * write-back, you can use this code.
+ */
+#include "memory.h"
+
+/* clear the cache line that contains 'array' */
+static inline void
+dcbzl(char *array)
+{
+ __asm__("dcbzl %0, %1"
+ : /*no result*/
+ : "b%"(array), "r"(0)
+ : "memory");
+}
+
+#define PPC_DO_ALIGN(x, y) ((char *)((((long long)(x)) + ((y)-1)) & ~((y)-1)))
+
+#define PPC_MAX_LINE_SIZE 256
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+ char testArray[2 * PPC_MAX_LINE_SIZE + 1];
+ char *test;
+ int i;
+
+ /* align the array on a maximum line size boundary, so we
+ * know we are starting to clear from the first address */
+ test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE);
+ /* set all the values to 1's */
+ memset(test, 0xff, PPC_MAX_LINE_SIZE);
+ /* clear one cache block starting at 'test' */
+ dcbzl(test);
+
+ /* find the size of the cleared area, that's our block size */
+ for (i = PPC_MAX_LINE_SIZE; i != 0; i = i / 2) {
+ if (test[i - 1] == 0) {
+ return i;
+ }
+ }
+ return 0;
+}
+
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
+#endif
+
+/*
+ * put other processor and platform specific cache code here
+ * return the smallest cache line size in bytes on the processor
+ * (usually the L1 cache). If the OS has a call, this would be
+ * a greate place to put it.
+ *
+ * If there is no cache, return 0;
+ *
+ * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
+ * below aren't compiled.
+ *
+ */
+
+/* If no way to get the processor cache line size has been defined, assume
+ * it's 32 bytes (most common value, does not significantly impact performance)
+ */
+#ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+ return 32;
+}
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpcpucache_amd64.s b/security/nss/lib/freebl/mpi/mpcpucache_amd64.s
new file mode 100644
index 0000000000..d493b4762f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpcpucache_amd64.s
@@ -0,0 +1,861 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .file "mpcpucache.c"
+/ .section .rodata.str1.1,"aMS",@progbits,1
+ .section .rodata
+.LC0:
+ .string "GenuineIntel"
+.LC1:
+ .string "AuthenticAMD"
+.LC2:
+ .string "CyrixInstead"
+.LC3:
+ .string "CentaurHauls"
+.LC4:
+ .string "NexGenDriven"
+.LC5:
+ .string "GenuineTMx86"
+.LC6:
+ .string "RiseRiseRise"
+.LC7:
+ .string "UMC UMC UMC "
+.LC8:
+ .string "Sis Sis Sis "
+.LC9:
+ .string "Geode by NSC"
+ .section .data.rel.ro.local,"aw",@progbits
+ .align 32
+ .type manMap, @object
+ .size manMap, 80
+manMap:
+ .quad .LC0
+ .quad .LC1
+ .quad .LC2
+ .quad .LC3
+ .quad .LC4
+ .quad .LC5
+ .quad .LC6
+ .quad .LC7
+ .quad .LC8
+ .quad .LC9
+ .section .rodata
+ .align 32
+ .type CacheMap, @object
+ .size CacheMap, 512
+CacheMap:
+ .byte 0
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .zero 1
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 0
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 64
+ .byte 8
+ .byte 64
+ .byte 8
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 5
+ .byte 1
+ .byte 5
+ .byte 1
+ .byte 5
+ .byte 1
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .text
+ .align 16
+.globl freebl_cpuid
+ .type freebl_cpuid, @function
+freebl_cpuid:
+.LFB2:
+ movq %rdx, %r10
+ pushq %rbx
+.LCFI0:
+ movq %rcx, %r11
+ movq %rdi, %rax
+/APP
+ cpuid
+
+/NO_APP
+ movq %rax, (%rsi)
+ movq %rbx, (%r10)
+ popq %rbx
+ movq %rcx, (%r11)
+ movq %rdx, (%r8)
+ ret
+.LFE2:
+ .size freebl_cpuid, .-freebl_cpuid
+ .align 16
+ .type getIntelCacheEntryLineSize, @function
+getIntelCacheEntryLineSize:
+.LFB3:
+ leaq CacheMap(%rip), %r9
+ movq %rdx, %r10
+ movzbl 1(%r9,%rdi,2), %ecx
+ movzbl (%r9,%rdi,2), %r8d
+ testb %cl, %cl
+ je .L2
+ cmpl $6, %r8d
+ sete %dl
+ cmpl $8, %r8d
+ sete %al
+ orl %edx, %eax
+ testb $1, %al
+ je .L4
+ movl $1, (%rsi)
+.L9:
+ movzbl %cl, %eax
+ movq %rax, (%r10)
+ ret
+ .align 16
+.L4:
+ movl (%rsi), %r11d
+ cmpl $1, %r11d
+ jg .L11
+.L6:
+ cmpl $2, %r11d
+ jle .L2
+ cmpl $12, %r8d
+ sete %dl
+ cmpl $14, %r8d
+ sete %al
+ orl %edx, %eax
+ testb $1, %al
+ je .L2
+ movzbq 1(%r9,%rdi,2), %rax
+ movl $3, (%rsi)
+ movq %rax, (%r10)
+ .align 16
+.L2:
+ rep ; ret
+ .align 16
+.L11:
+ cmpl $9, %r8d
+ sete %dl
+ cmpl $11, %r8d
+ sete %al
+ orl %edx, %eax
+ testb $1, %al
+ je .L6
+ movl $2, (%rsi)
+ jmp .L9
+.LFE3:
+ .size getIntelCacheEntryLineSize, .-getIntelCacheEntryLineSize
+ .align 16
+ .type getIntelRegisterCacheLineSize, @function
+getIntelRegisterCacheLineSize:
+.LFB4:
+ pushq %rbp
+.LCFI1:
+ movq %rsp, %rbp
+.LCFI2:
+ movq %rbx, -24(%rbp)
+.LCFI3:
+ movq %rdi, %rbx
+ shrq $24, %rdi
+ movq %r12, -16(%rbp)
+.LCFI4:
+ movq %r13, -8(%rbp)
+.LCFI5:
+ andl $255, %edi
+ subq $24, %rsp
+.LCFI6:
+ movq %rsi, %r13
+ movq %rdx, %r12
+ call getIntelCacheEntryLineSize
+ movq %rbx, %rdi
+ movq %r12, %rdx
+ movq %r13, %rsi
+ shrq $16, %rdi
+ andl $255, %edi
+ call getIntelCacheEntryLineSize
+ movq %rbx, %rdi
+ movq %r12, %rdx
+ movq %r13, %rsi
+ shrq $8, %rdi
+ andl $255, %ebx
+ andl $255, %edi
+ call getIntelCacheEntryLineSize
+ movq %r12, %rdx
+ movq %r13, %rsi
+ movq %rbx, %rdi
+ movq 8(%rsp), %r12
+ movq (%rsp), %rbx
+ movq 16(%rsp), %r13
+ leave
+ jmp getIntelCacheEntryLineSize
+.LFE4:
+ .size getIntelRegisterCacheLineSize, .-getIntelRegisterCacheLineSize
+ .align 16
+.globl s_mpi_getProcessorLineSize
+ .type s_mpi_getProcessorLineSize, @function
+s_mpi_getProcessorLineSize:
+.LFB7:
+ pushq %rbp
+.LCFI7:
+ xorl %edi, %edi
+ movq %rsp, %rbp
+.LCFI8:
+ pushq %r15
+.LCFI9:
+ leaq -136(%rbp), %r8
+ leaq -144(%rbp), %rcx
+ leaq -152(%rbp), %rdx
+ pushq %r14
+.LCFI10:
+ leaq -160(%rbp), %rsi
+ leaq -128(%rbp), %r14
+ pushq %r13
+.LCFI11:
+ leaq manMap(%rip), %r13
+ pushq %r12
+.LCFI12:
+ movl $9, %r12d
+ pushq %rbx
+.LCFI13:
+ xorl %ebx, %ebx
+ subq $200, %rsp
+.LCFI14:
+ call freebl_cpuid
+ movq -152(%rbp), %rax
+ movq -160(%rbp), %r15
+ movb $0, -116(%rbp)
+ movl %eax, -128(%rbp)
+ movq -136(%rbp), %rax
+ movl %eax, -124(%rbp)
+ movq -144(%rbp), %rax
+ movl %eax, -120(%rbp)
+ .align 16
+.L18:
+ movslq %ebx,%rax
+ movq %r14, %rsi
+ movq (%r13,%rax,8), %rdi
+ call strcmp@PLT
+ testl %eax, %eax
+ cmove %ebx, %r12d
+ incl %ebx
+ cmpl $9, %ebx
+ jle .L18
+ testl %r12d, %r12d
+ jne .L19
+ xorl %eax, %eax
+ decl %r15d
+ movl $4, -204(%rbp)
+ movq $0, -200(%rbp)
+ jle .L21
+ leaq -168(%rbp), %r8
+ leaq -176(%rbp), %rcx
+ leaq -184(%rbp), %rdx
+ leaq -192(%rbp), %rsi
+ movl $2, %edi
+ xorl %ebx, %ebx
+ call freebl_cpuid
+ movq -192(%rbp), %rdi
+ movl %edi, %r12d
+ andl $15, %r12d
+ cmpl %r12d, %ebx
+ jl .L30
+ jmp .L38
+ .align 16
+.L25:
+ movq -184(%rbp), %rdi
+ testl $2147483648, %edi
+ je .L40
+.L26:
+ movq -176(%rbp), %rdi
+ testl $2147483648, %edi
+ je .L41
+.L27:
+ movq -168(%rbp), %rdi
+ testl $2147483648, %edi
+ je .L42
+.L28:
+ incl %ebx
+ cmpl %r12d, %ebx
+ je .L24
+ leaq -168(%rbp), %r8
+ leaq -176(%rbp), %rcx
+ leaq -184(%rbp), %rdx
+ leaq -192(%rbp), %rsi
+ movl $2, %edi
+ call freebl_cpuid
+.L24:
+ cmpl %r12d, %ebx
+ jge .L38
+ movq -192(%rbp), %rdi
+.L30:
+ testl $2147483648, %edi
+ jne .L25
+ leaq -200(%rbp), %rdx
+ leaq -204(%rbp), %rsi
+ andl $4294967040, %edi
+ call getIntelRegisterCacheLineSize
+ movq -184(%rbp), %rdi
+ testl $2147483648, %edi
+ jne .L26
+.L40:
+ leaq -200(%rbp), %rdx
+ leaq -204(%rbp), %rsi
+ call getIntelRegisterCacheLineSize
+ movq -176(%rbp), %rdi
+ testl $2147483648, %edi
+ jne .L27
+.L41:
+ leaq -200(%rbp), %rdx
+ leaq -204(%rbp), %rsi
+ call getIntelRegisterCacheLineSize
+ movq -168(%rbp), %rdi
+ testl $2147483648, %edi
+ jne .L28
+.L42:
+ leaq -200(%rbp), %rdx
+ leaq -204(%rbp), %rsi
+ call getIntelRegisterCacheLineSize
+ jmp .L28
+.L38:
+ movq -200(%rbp), %rax
+.L21:
+ movq %rax, %rdx
+ movl $32, %eax
+ testq %rdx, %rdx
+ cmoveq %rax, %rdx
+ addq $200, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ popq %r15
+ leave
+ movq %rdx, %rax
+ ret
+.L19:
+ leaq -216(%rbp), %r8
+ leaq -224(%rbp), %rcx
+ leaq -232(%rbp), %rdx
+ leaq -240(%rbp), %rsi
+ movl $2147483648, %edi
+ xorl %ebx, %ebx
+ call freebl_cpuid
+ movl $2147483652, %eax
+ cmpq %rax, -240(%rbp)
+ ja .L43
+.L32:
+ movq %rbx, %rdx
+ movl $32, %eax
+ testq %rdx, %rdx
+ cmoveq %rax, %rdx
+ addq $200, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ popq %r15
+ leave
+ movq %rdx, %rax
+ ret
+.L43:
+ leaq -216(%rbp), %r8
+ leaq -224(%rbp), %rcx
+ leaq -232(%rbp), %rdx
+ leaq -240(%rbp), %rsi
+ movl $2147483653, %edi
+ call freebl_cpuid
+ movzbq -224(%rbp), %rbx
+ jmp .L32
+.LFE7:
+ .size s_mpi_getProcessorLineSize, .-s_mpi_getProcessorLineSize
diff --git a/security/nss/lib/freebl/mpi/mpcpucache_x86.s b/security/nss/lib/freebl/mpi/mpcpucache_x86.s
new file mode 100644
index 0000000000..af17ebcb42
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpcpucache_x86.s
@@ -0,0 +1,902 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .file "mpcpucache.c"
+/ .section .rodata.str1.1,"aMS",@progbits,1
+ .section .rodata
+.LC0:
+ .string "GenuineIntel"
+.LC1:
+ .string "AuthenticAMD"
+.LC2:
+ .string "CyrixInstead"
+.LC3:
+ .string "CentaurHauls"
+.LC4:
+ .string "NexGenDriven"
+.LC5:
+ .string "GenuineTMx86"
+.LC6:
+ .string "RiseRiseRise"
+.LC7:
+ .string "UMC UMC UMC "
+.LC8:
+ .string "Sis Sis Sis "
+.LC9:
+ .string "Geode by NSC"
+ .section .data.rel.ro.local,"aw",@progbits
+ .align 32
+ .type manMap, @object
+ .size manMap, 40
+manMap:
+ .long .LC0
+ .long .LC1
+ .long .LC2
+ .long .LC3
+ .long .LC4
+ .long .LC5
+ .long .LC6
+ .long .LC7
+ .long .LC8
+ .long .LC9
+ .section .rodata
+ .align 32
+ .type CacheMap, @object
+ .size CacheMap, 512
+CacheMap:
+ .byte 0
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .zero 1
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 0
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 64
+ .byte 8
+ .byte 64
+ .byte 8
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 5
+ .byte 1
+ .byte 5
+ .byte 1
+ .byte 5
+ .byte 1
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .text
+ .align 4
+.globl freebl_cpuid
+ .type freebl_cpuid, @function
+freebl_cpuid:
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ subl $8, %esp
+ movl %edx, %ebp
+/APP
+ pushl %ebx
+ xorl %ecx, %ecx
+ cpuid
+ mov %ebx,%esi
+ popl %ebx
+
+/NO_APP
+ movl %eax, (%ebp)
+ movl 24(%esp), %eax
+ movl %esi, (%eax)
+ movl 28(%esp), %eax
+ movl %ecx, (%eax)
+ movl 32(%esp), %eax
+ movl %edx, (%eax)
+ addl $8, %esp
+ popl %esi
+ popl %edi
+ popl %ebp
+ ret
+ .size freebl_cpuid, .-freebl_cpuid
+ .align 4
+ .type changeFlag, @function
+changeFlag:
+/APP
+ pushfl
+ popl %edx
+ movl %edx,%ecx
+ xorl %eax,%edx
+ pushl %edx
+ popfl
+ pushfl
+ popl %edx
+ pushl %ecx
+ popfl
+
+/NO_APP
+ xorl %ecx, %edx
+ movl %edx, %eax
+ ret
+ .size changeFlag, .-changeFlag
+ .align 4
+ .type getIntelCacheEntryLineSize, @function
+getIntelCacheEntryLineSize:
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ call .L17
+.L17:
+ popl %ebx
+ addl $_GLOBAL_OFFSET_TABLE_+[.-.L17], %ebx
+ movzbl CacheMap@GOTOFF(%ebx,%eax,2), %ecx
+ movb 1+CacheMap@GOTOFF(%ebx,%eax,2), %al
+ testb %al, %al
+ movl 16(%esp), %edi
+ je .L3
+ cmpl $6, %ecx
+ je .L6
+ cmpl $8, %ecx
+ je .L6
+ movl (%edx), %esi
+ cmpl $1, %esi
+ jg .L15
+.L8:
+ cmpl $2, %esi
+ jle .L3
+ cmpl $12, %ecx
+ je .L12
+ cmpl $14, %ecx
+ je .L12
+ .align 4
+.L3:
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+ .align 4
+.L6:
+ movzbl %al, %eax
+ movl $1, (%edx)
+ movl %eax, (%edi)
+.L16:
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+ .align 4
+.L15:
+ cmpl $9, %ecx
+ je .L9
+ cmpl $11, %ecx
+ jne .L8
+.L9:
+ movzbl %al, %eax
+ movl $2, (%edx)
+ movl %eax, (%edi)
+ jmp .L16
+.L12:
+ movzbl %al, %eax
+ movl $3, (%edx)
+ movl %eax, (%edi)
+ jmp .L16
+ .size getIntelCacheEntryLineSize, .-getIntelCacheEntryLineSize
+ .align 4
+ .type getIntelRegisterCacheLineSize, @function
+getIntelRegisterCacheLineSize:
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %edi
+ pushl %esi
+ pushl %ecx
+ movl 8(%ebp), %edi
+ movl %eax, %esi
+ movl %edx, -12(%ebp)
+ shrl $24, %eax
+ pushl %edi
+ call getIntelCacheEntryLineSize
+ movl %esi, %eax
+ pushl %edi
+ shrl $16, %eax
+ movl -12(%ebp), %edx
+ andl $255, %eax
+ call getIntelCacheEntryLineSize
+ pushl %edi
+ movl %esi, %edx
+ movzbl %dh, %eax
+ movl -12(%ebp), %edx
+ call getIntelCacheEntryLineSize
+ andl $255, %esi
+ movl %edi, 8(%ebp)
+ movl -12(%ebp), %edx
+ addl $12, %esp
+ leal -8(%ebp), %esp
+ movl %esi, %eax
+ popl %esi
+ popl %edi
+ leave
+ jmp getIntelCacheEntryLineSize
+ .size getIntelRegisterCacheLineSize, .-getIntelRegisterCacheLineSize
+ .align 4
+.globl s_mpi_getProcessorLineSize
+ .type s_mpi_getProcessorLineSize, @function
+s_mpi_getProcessorLineSize:
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ subl $188, %esp
+ call .L52
+.L52:
+ popl %ebx
+ addl $_GLOBAL_OFFSET_TABLE_+[.-.L52], %ebx
+ movl $9, -168(%ebp)
+ movl $262144, %eax
+ call changeFlag
+ xorl %edx, %edx
+ testl %eax, %eax
+ jne .L50
+.L19:
+ leal -12(%ebp), %esp
+ popl %ebx
+ popl %esi
+ movl %edx, %eax
+ popl %edi
+ leave
+ ret
+ .align 4
+.L50:
+ movl $2097152, %eax
+ call changeFlag
+ testl %eax, %eax
+ movl $32, %edx
+ je .L19
+ leal -108(%ebp), %eax
+ pushl %eax
+ leal -112(%ebp), %eax
+ pushl %eax
+ leal -116(%ebp), %eax
+ pushl %eax
+ leal -120(%ebp), %edx
+ xorl %eax, %eax
+ call freebl_cpuid
+ movl -120(%ebp), %eax
+ movl %eax, -164(%ebp)
+ movl -116(%ebp), %eax
+ movl %eax, -104(%ebp)
+ movl -108(%ebp), %eax
+ movl %eax, -100(%ebp)
+ movl -112(%ebp), %eax
+ movl %eax, -96(%ebp)
+ movb $0, -92(%ebp)
+ xorl %esi, %esi
+ addl $12, %esp
+ leal -104(%ebp), %edi
+ .align 4
+.L28:
+ subl $8, %esp
+ pushl %edi
+ pushl manMap@GOTOFF(%ebx,%esi,4)
+ call strcmp@PLT
+ addl $16, %esp
+ testl %eax, %eax
+ jne .L26
+ movl %esi, -168(%ebp)
+.L26:
+ incl %esi
+ cmpl $9, %esi
+ jle .L28
+ movl -168(%ebp), %eax
+ testl %eax, %eax
+ jne .L29
+ xorl %eax, %eax
+ cmpl $1, -164(%ebp)
+ movl $4, -144(%ebp)
+ movl $0, -140(%ebp)
+ jle .L41
+ leal -124(%ebp), %edx
+ movl %edx, -188(%ebp)
+ leal -128(%ebp), %eax
+ pushl %edx
+ movl %eax, -184(%ebp)
+ leal -132(%ebp), %edx
+ pushl %eax
+ movl %edx, -180(%ebp)
+ movl $2, %eax
+ pushl %edx
+ leal -136(%ebp), %edx
+ call freebl_cpuid
+ movl -136(%ebp), %eax
+ movl %eax, %edi
+ andl $15, %edi
+ xorl %esi, %esi
+ addl $12, %esp
+ leal -140(%ebp), %edx
+ cmpl %edi, %esi
+ movl %edx, -176(%ebp)
+ jl .L40
+ jmp .L48
+ .align 4
+.L49:
+ movl -136(%ebp), %eax
+.L40:
+ testl %eax, %eax
+ js .L35
+ xorb %al, %al
+ pushl -176(%ebp)
+ leal -144(%ebp), %edx
+ call getIntelRegisterCacheLineSize
+ popl %eax
+.L35:
+ movl -132(%ebp), %eax
+ testl %eax, %eax
+ js .L36
+ pushl -176(%ebp)
+ leal -144(%ebp), %edx
+ call getIntelRegisterCacheLineSize
+ popl %eax
+.L36:
+ movl -128(%ebp), %eax
+ testl %eax, %eax
+ js .L37
+ pushl -176(%ebp)
+ leal -144(%ebp), %edx
+ call getIntelRegisterCacheLineSize
+ popl %eax
+.L37:
+ movl -124(%ebp), %eax
+ testl %eax, %eax
+ js .L38
+ pushl -176(%ebp)
+ leal -144(%ebp), %edx
+ call getIntelRegisterCacheLineSize
+ popl %eax
+.L38:
+ incl %esi
+ cmpl %edi, %esi
+ je .L34
+ pushl -188(%ebp)
+ pushl -184(%ebp)
+ pushl -180(%ebp)
+ leal -136(%ebp), %edx
+ movl $2, %eax
+ call freebl_cpuid
+ addl $12, %esp
+.L34:
+ cmpl %edi, %esi
+ jl .L49
+.L48:
+ movl -140(%ebp), %eax
+.L41:
+ testl %eax, %eax
+ jne .L44
+ movb $32, %al
+.L44:
+ leal -12(%ebp), %esp
+ popl %ebx
+ popl %esi
+ movl %eax, %edx
+ movl %edx, %eax
+ popl %edi
+ leave
+ ret
+.L29:
+ leal -148(%ebp), %eax
+ movl %eax, -192(%ebp)
+ movl $0, -172(%ebp)
+ leal -152(%ebp), %edi
+ pushl %eax
+ pushl %edi
+ leal -156(%ebp), %esi
+ pushl %esi
+ leal -160(%ebp), %edx
+ movl $-2147483648, %eax
+ call freebl_cpuid
+ addl $12, %esp
+ cmpl $-2147483644, -160(%ebp)
+ ja .L51
+.L42:
+ movl -172(%ebp), %eax
+ jmp .L41
+.L51:
+ pushl -192(%ebp)
+ pushl %edi
+ pushl %esi
+ leal -160(%ebp), %edx
+ movl $-2147483643, %eax
+ call freebl_cpuid
+ movzbl -152(%ebp), %edx
+ addl $12, %esp
+ movl %edx, -172(%ebp)
+ jmp .L42
+ .size s_mpi_getProcessorLineSize, .-s_mpi_getProcessorLineSize
diff --git a/security/nss/lib/freebl/mpi/mpi-config.h b/security/nss/lib/freebl/mpi/mpi-config.h
new file mode 100644
index 0000000000..0cc868a14b
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi-config.h
@@ -0,0 +1,56 @@
+/* Default configuration for MPI library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MPI_CONFIG_H_
+#define MPI_CONFIG_H_
+
+/*
+ For boolean options,
+ 0 = no
+ 1 = yes
+
+ Other options are documented individually.
+
+ */
+
+#ifndef MP_IOFUNC
+#define MP_IOFUNC 0 /* include mp_print() ? */
+#endif
+
+#ifndef MP_MODARITH
+#define MP_MODARITH 1 /* include modular arithmetic ? */
+#endif
+
+#ifndef MP_LOGTAB
+#define MP_LOGTAB 1 /* use table of logs instead of log()? */
+#endif
+
+#ifndef MP_ARGCHK
+/*
+ 0 = no parameter checks
+ 1 = runtime checks, continue execution and return an error to caller
+ 2 = assertions; dump core on parameter errors
+ */
+#ifdef DEBUG
+#define MP_ARGCHK 2 /* how to check input arguments */
+#else
+#define MP_ARGCHK 1 /* how to check input arguments */
+#endif
+#endif
+
+#ifndef MP_DEBUG
+#define MP_DEBUG 0 /* print diagnostic output? */
+#endif
+
+#ifndef MP_DEFPREC
+#define MP_DEFPREC 64 /* default precision, in digits */
+#endif
+
+#ifndef MP_SQUARE
+#define MP_SQUARE 1 /* use separate squaring code? */
+#endif
+
+#endif /* ifndef MPI_CONFIG_H_ */
diff --git a/security/nss/lib/freebl/mpi/mpi-priv.h b/security/nss/lib/freebl/mpi/mpi-priv.h
new file mode 100644
index 0000000000..b4333fb6b4
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi-priv.h
@@ -0,0 +1,246 @@
+/*
+ * mpi-priv.h - Private header file for MPI
+ * Arbitrary precision integer arithmetic library
+ *
+ * NOTE WELL: the content of this header file is NOT part of the "public"
+ * API for the MPI library, and may change at any time.
+ * Application programs that use libmpi should NOT include this header file.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef _MPI_PRIV_H_
+#define _MPI_PRIV_H_ 1
+
+#include "mpi.h"
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#if MP_DEBUG
+#include <stdio.h>
+
+#define DIAG(T, V) \
+ { \
+ fprintf(stderr, T); \
+ mp_print(V, stderr); \
+ fputc('\n', stderr); \
+ }
+#else
+#define DIAG(T, V)
+#endif
+
+/* If we aren't using a wired-in logarithm table, we need to include
+ the math library to get the log() function
+ */
+
+/* {{{ s_logv_2[] - log table for 2 in various bases */
+
+#if MP_LOGTAB
+/*
+ A table of the logs of 2 for various bases (the 0 and 1 entries of
+ this table are meaningless and should not be referenced).
+
+ This table is used to compute output lengths for the mp_toradix()
+ function. Since a number n in radix r takes up about log_r(n)
+ digits, we estimate the output size by taking the least integer
+ greater than log_r(n), where:
+
+ log_r(n) = log_2(n) * log_r(2)
+
+ This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
+ which are the output bases supported.
+ */
+
+extern const float s_logv_2[];
+#define LOG_V_2(R) s_logv_2[(R)]
+
+#else
+
+/*
+ If MP_LOGTAB is not defined, use the math library to compute the
+ logarithms on the fly. Otherwise, use the table.
+ Pick which works best for your system.
+ */
+
+#include <math.h>
+#define LOG_V_2(R) (log(2.0) / log(R))
+
+#endif /* if MP_LOGTAB */
+
+/* }}} */
+
+/* {{{ Digit arithmetic macros */
+
+/*
+ When adding and multiplying digits, the results can be larger than
+ can be contained in an mp_digit. Thus, an mp_word is used. These
+ macros mask off the upper and lower digits of the mp_word (the
+ mp_word may be more than 2 mp_digits wide, but we only concern
+ ourselves with the low-order 2 mp_digits)
+ */
+
+#define CARRYOUT(W) (mp_digit)((W) >> DIGIT_BIT)
+#define ACCUM(W) (mp_digit)(W)
+
+#define MP_MIN(a, b) (((a) < (b)) ? (a) : (b))
+#define MP_MAX(a, b) (((a) > (b)) ? (a) : (b))
+#define MP_HOWMANY(a, b) (((a) + (b)-1) / (b))
+#define MP_ROUNDUP(a, b) (MP_HOWMANY(a, b) * (b))
+
+/* }}} */
+
+/* {{{ Comparison constants */
+
+#define MP_LT -1
+#define MP_EQ 0
+#define MP_GT 1
+
+/* }}} */
+
+/* {{{ private function declarations */
+
+void s_mp_setz(mp_digit *dp, mp_size count); /* zero digits */
+void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count); /* copy */
+void *s_mp_alloc(size_t nb, size_t ni); /* general allocator */
+void s_mp_free(void *ptr); /* general free function */
+
+mp_err s_mp_grow(mp_int *mp, mp_size min); /* increase allocated size */
+mp_err s_mp_pad(mp_int *mp, mp_size min); /* left pad with zeroes */
+
+void s_mp_clamp(mp_int *mp); /* clip leading zeroes */
+
+void s_mp_exch(mp_int *a, mp_int *b); /* swap a and b in place */
+
+mp_err s_mp_lshd(mp_int *mp, mp_size p); /* left-shift by p digits */
+void s_mp_rshd(mp_int *mp, mp_size p); /* right-shift by p digits */
+mp_err s_mp_mul_2d(mp_int *mp, mp_digit d); /* multiply by 2^d in place */
+void s_mp_div_2d(mp_int *mp, mp_digit d); /* divide by 2^d in place */
+void s_mp_mod_2d(mp_int *mp, mp_digit d); /* modulo 2^d in place */
+void s_mp_div_2(mp_int *mp); /* divide by 2 in place */
+mp_err s_mp_mul_2(mp_int *mp); /* multiply by 2 in place */
+mp_err s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd);
+/* normalize for division */
+mp_err s_mp_add_d(mp_int *mp, mp_digit d); /* unsigned digit addition */
+mp_err s_mp_sub_d(mp_int *mp, mp_digit d); /* unsigned digit subtract */
+mp_err s_mp_mul_d(mp_int *mp, mp_digit d); /* unsigned digit multiply */
+mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r);
+/* unsigned digit divide */
+mp_err s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu);
+/* Barrett reduction */
+mp_err s_mp_add(mp_int *a, const mp_int *b); /* magnitude addition */
+mp_err s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err s_mp_sub(mp_int *a, const mp_int *b); /* magnitude subtract */
+mp_err s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset);
+/* a += b * RADIX^offset */
+mp_err s_mp_mul(mp_int *a, const mp_int *b); /* magnitude multiply */
+#if MP_SQUARE
+mp_err s_mp_sqr(mp_int *a); /* magnitude square */
+#else
+#define s_mp_sqr(a) s_mp_mul(a, a)
+#endif
+mp_err s_mp_div(mp_int *rem, mp_int *div, mp_int *quot); /* magnitude div */
+mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+mp_err s_mp_2expt(mp_int *a, mp_digit k); /* a = 2^k */
+int s_mp_cmp(const mp_int *a, const mp_int *b); /* magnitude comparison */
+int s_mp_cmp_d(const mp_int *a, mp_digit d); /* magnitude digit compare */
+int s_mp_ispow2(const mp_int *v); /* is v a power of 2? */
+int s_mp_ispow2d(mp_digit d); /* is d a power of 2? */
+
+int s_mp_tovalue(char ch, int r); /* convert ch to value */
+char s_mp_todigit(mp_digit val, int r, int low); /* convert val to digit */
+int s_mp_outlen(int bits, int r); /* output length in bytes */
+mp_digit s_mp_invmod_radix(mp_digit P); /* returns (P ** -1) mod RADIX */
+mp_err s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c);
+mp_err s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c);
+mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c);
+
+#ifdef NSS_USE_COMBA
+PR_STATIC_ASSERT(sizeof(mp_digit) == 8);
+#define IS_POWER_OF_2(a) ((a) && !((a) & ((a)-1)))
+
+void s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C);
+void s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C);
+void s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C);
+void s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C);
+
+void s_mp_sqr_comba_4(const mp_int *A, mp_int *B);
+void s_mp_sqr_comba_8(const mp_int *A, mp_int *B);
+void s_mp_sqr_comba_16(const mp_int *A, mp_int *B);
+void s_mp_sqr_comba_32(const mp_int *A, mp_int *B);
+
+#endif /* end NSS_USE_COMBA */
+
+/* ------ mpv functions, operate on arrays of digits, not on mp_int's ------ */
+#if defined(__OS2__) && defined(__IBMC__)
+#define MPI_ASM_DECL __cdecl
+#else
+#define MPI_ASM_DECL
+#endif
+
+#ifdef MPI_AMD64
+
+mp_digit MPI_ASM_DECL s_mpv_mul_set_vec64(mp_digit *, mp_digit *, mp_size, mp_digit);
+mp_digit MPI_ASM_DECL s_mpv_mul_add_vec64(mp_digit *, const mp_digit *, mp_size, mp_digit);
+
+/* c = a * b */
+#define s_mpv_mul_d(a, a_len, b, c) \
+ ((mp_digit *)c)[a_len] = s_mpv_mul_set_vec64(c, a, a_len, b)
+
+/* c += a * b */
+#define s_mpv_mul_d_add(a, a_len, b, c) \
+ ((mp_digit *)c)[a_len] = s_mpv_mul_add_vec64(c, a, a_len, b)
+
+#else
+
+void MPI_ASM_DECL s_mpv_mul_d(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c);
+void MPI_ASM_DECL s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c);
+
+#endif
+
+void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a,
+ mp_size a_len, mp_digit b,
+ mp_digit *c);
+void MPI_ASM_DECL s_mpv_mul_d_add_propCT(const mp_digit *a,
+ mp_size a_len, mp_digit b,
+ mp_digit *c, mp_size c_len);
+void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a,
+ mp_size a_len,
+ mp_digit *sqrs);
+
+mp_err MPI_ASM_DECL s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo,
+ mp_digit divisor, mp_digit *quot, mp_digit *rem);
+
+/* c += a * b * (MP_RADIX ** offset); */
+/* Callers of this macro should be aware that the return type might vary;
+ * it should be treated as a void function. */
+#define s_mp_mul_d_add_offset(a, b, c, off) \
+ s_mpv_mul_d_add_prop(MP_DIGITS(a), MP_USED(a), b, MP_DIGITS(c) + off)
+
+typedef struct {
+ mp_int N; /* modulus N */
+ mp_digit n0prime; /* n0' = - (n0 ** -1) mod MP_RADIX */
+} mp_mont_modulus;
+
+mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c,
+ mp_mont_modulus *mmm);
+mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm);
+
+/*
+ * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
+ * if a cache exists, or zero if there is no cache. If more than one
+ * cache line exists, it should return the smallest line size (which is
+ * usually the L1 cache).
+ *
+ * mp_modexp uses this information to make sure that private key information
+ * isn't being leaked through the cache.
+ *
+ * see mpcpucache.c for the implementation.
+ */
+unsigned long s_mpi_getProcessorLineSize();
+
+/* }}} */
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpi.c b/security/nss/lib/freebl/mpi/mpi.c
new file mode 100644
index 0000000000..7749dc710f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi.c
@@ -0,0 +1,5241 @@
+/*
+ * mpi.c
+ *
+ * Arbitrary precision integer arithmetic library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#include "mplogic.h"
+
+#include <assert.h>
+
+#if defined(__arm__) && \
+ ((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__))
+/* 16-bit thumb or ARM v3 doesn't work inlined assember version */
+#undef MP_ASSEMBLY_MULTIPLY
+#undef MP_ASSEMBLY_SQUARE
+#endif
+
+#if MP_LOGTAB
+/*
+ A table of the logs of 2 for various bases (the 0 and 1 entries of
+ this table are meaningless and should not be referenced).
+
+ This table is used to compute output lengths for the mp_toradix()
+ function. Since a number n in radix r takes up about log_r(n)
+ digits, we estimate the output size by taking the least integer
+ greater than log_r(n), where:
+
+ log_r(n) = log_2(n) * log_r(2)
+
+ This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
+ which are the output bases supported.
+ */
+#include "logtab.h"
+#endif
+
+#ifdef CT_VERIF
+#include <valgrind/memcheck.h>
+#endif
+
+/* {{{ Constant strings */
+
+/* Constant strings returned by mp_strerror() */
+static const char *mp_err_string[] = {
+ "unknown result code", /* say what? */
+ "boolean true", /* MP_OKAY, MP_YES */
+ "boolean false", /* MP_NO */
+ "out of memory", /* MP_MEM */
+ "argument out of range", /* MP_RANGE */
+ "invalid input parameter", /* MP_BADARG */
+ "result is undefined" /* MP_UNDEF */
+};
+
+/* Value to digit maps for radix conversion */
+
+/* s_dmap_1 - standard digits and letters */
+static const char *s_dmap_1 =
+ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
+
+/* }}} */
+
+/* {{{ Default precision manipulation */
+
+/* Default precision for newly created mp_int's */
+static mp_size s_mp_defprec = MP_DEFPREC;
+
+mp_size
+mp_get_prec(void)
+{
+ return s_mp_defprec;
+
+} /* end mp_get_prec() */
+
+void
+mp_set_prec(mp_size prec)
+{
+ if (prec == 0)
+ s_mp_defprec = MP_DEFPREC;
+ else
+ s_mp_defprec = prec;
+
+} /* end mp_set_prec() */
+
+/* }}} */
+
+#ifdef CT_VERIF
+void
+mp_taint(mp_int *mp)
+{
+ size_t i;
+ for (i = 0; i < mp->used; ++i) {
+ VALGRIND_MAKE_MEM_UNDEFINED(&(mp->dp[i]), sizeof(mp_digit));
+ }
+}
+
+void
+mp_untaint(mp_int *mp)
+{
+ size_t i;
+ for (i = 0; i < mp->used; ++i) {
+ VALGRIND_MAKE_MEM_DEFINED(&(mp->dp[i]), sizeof(mp_digit));
+ }
+}
+#endif
+
+/*------------------------------------------------------------------------*/
+/* {{{ mp_init(mp) */
+
+/*
+ mp_init(mp)
+
+ Initialize a new zero-valued mp_int. Returns MP_OKAY if successful,
+ MP_MEM if memory could not be allocated for the structure.
+ */
+
+mp_err
+mp_init(mp_int *mp)
+{
+ return mp_init_size(mp, s_mp_defprec);
+
+} /* end mp_init() */
+
+/* }}} */
+
+/* {{{ mp_init_size(mp, prec) */
+
+/*
+ mp_init_size(mp, prec)
+
+ Initialize a new zero-valued mp_int with at least the given
+ precision; returns MP_OKAY if successful, or MP_MEM if memory could
+ not be allocated for the structure.
+ */
+
+mp_err
+mp_init_size(mp_int *mp, mp_size prec)
+{
+ ARGCHK(mp != NULL && prec > 0, MP_BADARG);
+
+ prec = MP_ROUNDUP(prec, s_mp_defprec);
+ if ((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL)
+ return MP_MEM;
+
+ SIGN(mp) = ZPOS;
+ USED(mp) = 1;
+ ALLOC(mp) = prec;
+
+ return MP_OKAY;
+
+} /* end mp_init_size() */
+
+/* }}} */
+
+/* {{{ mp_init_copy(mp, from) */
+
+/*
+ mp_init_copy(mp, from)
+
+ Initialize mp as an exact copy of from. Returns MP_OKAY if
+ successful, MP_MEM if memory could not be allocated for the new
+ structure.
+ */
+
+mp_err
+mp_init_copy(mp_int *mp, const mp_int *from)
+{
+ ARGCHK(mp != NULL && from != NULL, MP_BADARG);
+
+ if (mp == from)
+ return MP_OKAY;
+
+ if ((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
+ return MP_MEM;
+
+ s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
+ USED(mp) = USED(from);
+ ALLOC(mp) = ALLOC(from);
+ SIGN(mp) = SIGN(from);
+
+ return MP_OKAY;
+
+} /* end mp_init_copy() */
+
+/* }}} */
+
+/* {{{ mp_copy(from, to) */
+
+/*
+ mp_copy(from, to)
+
+ Copies the mp_int 'from' to the mp_int 'to'. It is presumed that
+ 'to' has already been initialized (if not, use mp_init_copy()
+ instead). If 'from' and 'to' are identical, nothing happens.
+ */
+
+mp_err
+mp_copy(const mp_int *from, mp_int *to)
+{
+ ARGCHK(from != NULL && to != NULL, MP_BADARG);
+
+ if (from == to)
+ return MP_OKAY;
+
+ { /* copy */
+ mp_digit *tmp;
+
+ /*
+ If the allocated buffer in 'to' already has enough space to hold
+ all the used digits of 'from', we'll re-use it to avoid hitting
+ the memory allocater more than necessary; otherwise, we'd have
+ to grow anyway, so we just allocate a hunk and make the copy as
+ usual
+ */
+ if (ALLOC(to) >= USED(from)) {
+ s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
+ s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
+
+ } else {
+ if ((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
+ return MP_MEM;
+
+ s_mp_copy(DIGITS(from), tmp, USED(from));
+
+ if (DIGITS(to) != NULL) {
+ s_mp_setz(DIGITS(to), ALLOC(to));
+ s_mp_free(DIGITS(to));
+ }
+
+ DIGITS(to) = tmp;
+ ALLOC(to) = ALLOC(from);
+ }
+
+ /* Copy the precision and sign from the original */
+ USED(to) = USED(from);
+ SIGN(to) = SIGN(from);
+ } /* end copy */
+
+ return MP_OKAY;
+
+} /* end mp_copy() */
+
+/* }}} */
+
+/* {{{ mp_exch(mp1, mp2) */
+
+/*
+ mp_exch(mp1, mp2)
+
+ Exchange mp1 and mp2 without allocating any intermediate memory
+ (well, unless you count the stack space needed for this call and the
+ locals it creates...). This cannot fail.
+ */
+
+void
+mp_exch(mp_int *mp1, mp_int *mp2)
+{
+#if MP_ARGCHK == 2
+ assert(mp1 != NULL && mp2 != NULL);
+#else
+ if (mp1 == NULL || mp2 == NULL)
+ return;
+#endif
+
+ s_mp_exch(mp1, mp2);
+
+} /* end mp_exch() */
+
+/* }}} */
+
+/* {{{ mp_clear(mp) */
+
+/*
+ mp_clear(mp)
+
+ Release the storage used by an mp_int, and void its fields so that
+ if someone calls mp_clear() again for the same int later, we won't
+ get tollchocked.
+ */
+
+void
+mp_clear(mp_int *mp)
+{
+ if (mp == NULL)
+ return;
+
+ if (DIGITS(mp) != NULL) {
+ s_mp_setz(DIGITS(mp), ALLOC(mp));
+ s_mp_free(DIGITS(mp));
+ DIGITS(mp) = NULL;
+ }
+
+ USED(mp) = 0;
+ ALLOC(mp) = 0;
+
+} /* end mp_clear() */
+
+/* }}} */
+
+/* {{{ mp_zero(mp) */
+
+/*
+ mp_zero(mp)
+
+ Set mp to zero. Does not change the allocated size of the structure,
+ and therefore cannot fail (except on a bad argument, which we ignore)
+ */
+void
+mp_zero(mp_int *mp)
+{
+ if (mp == NULL)
+ return;
+
+ s_mp_setz(DIGITS(mp), ALLOC(mp));
+ USED(mp) = 1;
+ SIGN(mp) = ZPOS;
+
+} /* end mp_zero() */
+
+/* }}} */
+
+/* {{{ mp_set(mp, d) */
+
+void
+mp_set(mp_int *mp, mp_digit d)
+{
+ if (mp == NULL)
+ return;
+
+ mp_zero(mp);
+ DIGIT(mp, 0) = d;
+
+} /* end mp_set() */
+
+/* }}} */
+
+/* {{{ mp_set_int(mp, z) */
+
+mp_err
+mp_set_int(mp_int *mp, long z)
+{
+ unsigned long v = labs(z);
+ mp_err res;
+
+ ARGCHK(mp != NULL, MP_BADARG);
+
+ /* https://bugzilla.mozilla.org/show_bug.cgi?id=1509432 */
+ if ((res = mp_set_ulong(mp, v)) != MP_OKAY) { /* avoids duplicated code */
+ return res;
+ }
+
+ if (z < 0) {
+ SIGN(mp) = NEG;
+ }
+
+ return MP_OKAY;
+} /* end mp_set_int() */
+
+/* }}} */
+
+/* {{{ mp_set_ulong(mp, z) */
+
+mp_err
+mp_set_ulong(mp_int *mp, unsigned long z)
+{
+ int ix;
+ mp_err res;
+
+ ARGCHK(mp != NULL, MP_BADARG);
+
+ mp_zero(mp);
+ if (z == 0)
+ return MP_OKAY; /* shortcut for zero */
+
+ if (sizeof z <= sizeof(mp_digit)) {
+ DIGIT(mp, 0) = z;
+ } else {
+ for (ix = sizeof(long) - 1; ix >= 0; ix--) {
+ if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
+ return res;
+
+ res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
+ if (res != MP_OKAY)
+ return res;
+ }
+ }
+ return MP_OKAY;
+} /* end mp_set_ulong() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Digit arithmetic */
+
+/* {{{ mp_add_d(a, d, b) */
+
+/*
+ mp_add_d(a, d, b)
+
+ Compute the sum b = a + d, for a single digit d. Respects the sign of
+ its primary addend (single digits are unsigned anyway).
+ */
+
+mp_err
+mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
+{
+ mp_int tmp;
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
+ return res;
+
+ if (SIGN(&tmp) == ZPOS) {
+ if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
+ goto CLEANUP;
+ } else if (s_mp_cmp_d(&tmp, d) >= 0) {
+ if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
+ goto CLEANUP;
+ } else {
+ mp_neg(&tmp, &tmp);
+
+ DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
+ }
+
+ if (s_mp_cmp_d(&tmp, 0) == 0)
+ SIGN(&tmp) = ZPOS;
+
+ s_mp_exch(&tmp, b);
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+
+} /* end mp_add_d() */
+
+/* }}} */
+
+/* {{{ mp_sub_d(a, d, b) */
+
+/*
+ mp_sub_d(a, d, b)
+
+ Compute the difference b = a - d, for a single digit d. Respects the
+ sign of its subtrahend (single digits are unsigned anyway).
+ */
+
+mp_err
+mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
+{
+ mp_int tmp;
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
+ return res;
+
+ if (SIGN(&tmp) == NEG) {
+ if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
+ goto CLEANUP;
+ } else if (s_mp_cmp_d(&tmp, d) >= 0) {
+ if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
+ goto CLEANUP;
+ } else {
+ mp_neg(&tmp, &tmp);
+
+ DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
+ SIGN(&tmp) = NEG;
+ }
+
+ if (s_mp_cmp_d(&tmp, 0) == 0)
+ SIGN(&tmp) = ZPOS;
+
+ s_mp_exch(&tmp, b);
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+
+} /* end mp_sub_d() */
+
+/* }}} */
+
+/* {{{ mp_mul_d(a, d, b) */
+
+/*
+ mp_mul_d(a, d, b)
+
+ Compute the product b = a * d, for a single digit d. Respects the sign
+ of its multiplicand (single digits are unsigned anyway)
+ */
+
+mp_err
+mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if (d == 0) {
+ mp_zero(b);
+ return MP_OKAY;
+ }
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ res = s_mp_mul_d(b, d);
+
+ return res;
+
+} /* end mp_mul_d() */
+
+/* }}} */
+
+/* {{{ mp_mul_2(a, c) */
+
+mp_err
+mp_mul_2(const mp_int *a, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, c)) != MP_OKAY)
+ return res;
+
+ return s_mp_mul_2(c);
+
+} /* end mp_mul_2() */
+
+/* }}} */
+
+/* {{{ mp_div_d(a, d, q, r) */
+
+/*
+ mp_div_d(a, d, q, r)
+
+ Compute the quotient q = a / d and remainder r = a mod d, for a
+ single digit d. Respects the sign of its divisor (single digits are
+ unsigned anyway).
+ */
+
+mp_err
+mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
+{
+ mp_err res;
+ mp_int qp;
+ mp_digit rem = 0;
+ int pow;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ if (d == 0)
+ return MP_RANGE;
+
+ /* Shortcut for powers of two ... */
+ if ((pow = s_mp_ispow2d(d)) >= 0) {
+ mp_digit mask;
+
+ mask = ((mp_digit)1 << pow) - 1;
+ rem = DIGIT(a, 0) & mask;
+
+ if (q) {
+ if ((res = mp_copy(a, q)) != MP_OKAY) {
+ return res;
+ }
+ s_mp_div_2d(q, pow);
+ }
+
+ if (r)
+ *r = rem;
+
+ return MP_OKAY;
+ }
+
+ if ((res = mp_init_copy(&qp, a)) != MP_OKAY)
+ return res;
+
+ res = s_mp_div_d(&qp, d, &rem);
+
+ if (s_mp_cmp_d(&qp, 0) == 0)
+ SIGN(q) = ZPOS;
+
+ if (r) {
+ *r = rem;
+ }
+
+ if (q)
+ s_mp_exch(&qp, q);
+
+ mp_clear(&qp);
+ return res;
+
+} /* end mp_div_d() */
+
+/* }}} */
+
+/* {{{ mp_div_2(a, c) */
+
+/*
+ mp_div_2(a, c)
+
+ Compute c = a / 2, disregarding the remainder.
+ */
+
+mp_err
+mp_div_2(const mp_int *a, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, c)) != MP_OKAY)
+ return res;
+
+ s_mp_div_2(c);
+
+ return MP_OKAY;
+
+} /* end mp_div_2() */
+
+/* }}} */
+
+/* {{{ mp_expt_d(a, d, b) */
+
+mp_err
+mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
+{
+ mp_int s, x;
+ mp_err res;
+
+ ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_init(&s)) != MP_OKAY)
+ return res;
+ if ((res = mp_init_copy(&x, a)) != MP_OKAY)
+ goto X;
+
+ DIGIT(&s, 0) = 1;
+
+ while (d != 0) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d /= 2;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ s_mp_exch(&s, c);
+
+CLEANUP:
+ mp_clear(&x);
+X:
+ mp_clear(&s);
+
+ return res;
+
+} /* end mp_expt_d() */
+
+/* }}} */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Full arithmetic */
+
+/* {{{ mp_abs(a, b) */
+
+/*
+ mp_abs(a, b)
+
+ Compute b = |a|. 'a' and 'b' may be identical.
+ */
+
+mp_err
+mp_abs(const mp_int *a, mp_int *b)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ SIGN(b) = ZPOS;
+
+ return MP_OKAY;
+
+} /* end mp_abs() */
+
+/* }}} */
+
+/* {{{ mp_neg(a, b) */
+
+/*
+ mp_neg(a, b)
+
+ Compute b = -a. 'a' and 'b' may be identical.
+ */
+
+mp_err
+mp_neg(const mp_int *a, mp_int *b)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ if (s_mp_cmp_d(b, 0) == MP_EQ)
+ SIGN(b) = ZPOS;
+ else
+ SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
+
+ return MP_OKAY;
+
+} /* end mp_neg() */
+
+/* }}} */
+
+/* {{{ mp_add(a, b, c) */
+
+/*
+ mp_add(a, b, c)
+
+ Compute c = a + b. All parameters may be identical.
+ */
+
+mp_err
+mp_add(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (SIGN(a) == SIGN(b)) { /* same sign: add values, keep sign */
+ MP_CHECKOK(s_mp_add_3arg(a, b, c));
+ } else if (s_mp_cmp(a, b) >= 0) { /* different sign: |a| >= |b| */
+ MP_CHECKOK(s_mp_sub_3arg(a, b, c));
+ } else { /* different sign: |a| < |b| */
+ MP_CHECKOK(s_mp_sub_3arg(b, a, c));
+ }
+
+ if (s_mp_cmp_d(c, 0) == MP_EQ)
+ SIGN(c) = ZPOS;
+
+CLEANUP:
+ return res;
+
+} /* end mp_add() */
+
+/* }}} */
+
+/* {{{ mp_sub(a, b, c) */
+
+/*
+ mp_sub(a, b, c)
+
+ Compute c = a - b. All parameters may be identical.
+ */
+
+mp_err
+mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_err res;
+ int magDiff;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (a == b) {
+ mp_zero(c);
+ return MP_OKAY;
+ }
+
+ if (MP_SIGN(a) != MP_SIGN(b)) {
+ MP_CHECKOK(s_mp_add_3arg(a, b, c));
+ } else if (!(magDiff = s_mp_cmp(a, b))) {
+ mp_zero(c);
+ res = MP_OKAY;
+ } else if (magDiff > 0) {
+ MP_CHECKOK(s_mp_sub_3arg(a, b, c));
+ } else {
+ MP_CHECKOK(s_mp_sub_3arg(b, a, c));
+ MP_SIGN(c) = !MP_SIGN(a);
+ }
+
+ if (s_mp_cmp_d(c, 0) == MP_EQ)
+ MP_SIGN(c) = MP_ZPOS;
+
+CLEANUP:
+ return res;
+
+} /* end mp_sub() */
+
+/* }}} */
+
+/* {{{ s_mp_mulg(a, b, c) */
+
+/*
+ s_mp_mulg(a, b, c)
+
+ Compute c = a * b. All parameters may be identical. if constantTime is set,
+ then the operations are done in constant time. The original is mostly
+ constant time as long as s_mpv_mul_d_add() is constant time. This is true
+ of the x86 assembler, as well as the current c code.
+ */
+mp_err
+s_mp_mulg(const mp_int *a, const mp_int *b, mp_int *c, int constantTime)
+{
+ mp_digit *pb;
+ mp_int tmp;
+ mp_err res;
+ mp_size ib;
+ mp_size useda, usedb;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (a == c) {
+ if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
+ return res;
+ if (a == b)
+ b = &tmp;
+ a = &tmp;
+ } else if (b == c) {
+ if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
+ return res;
+ b = &tmp;
+ } else {
+ MP_DIGITS(&tmp) = 0;
+ }
+
+ if (MP_USED(a) < MP_USED(b)) {
+ const mp_int *xch = b; /* switch a and b, to do fewer outer loops */
+ b = a;
+ a = xch;
+ }
+
+ MP_USED(c) = 1;
+ MP_DIGIT(c, 0) = 0;
+ if ((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
+ goto CLEANUP;
+
+#ifdef NSS_USE_COMBA
+ /* comba isn't constant time because it clamps! If we cared
+ * (we needed a constant time version of multiply that was 'faster'
+ * we could easily pass constantTime down to the comba code and
+ * get it to skip the clamp... but here are assembler versions
+ * which add comba to platforms that can't compile the normal
+ * comba's imbedded assembler which would also need to change, so
+ * for now we just skip comba when we are running constant time. */
+ if (!constantTime && (MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
+ if (MP_USED(a) == 4) {
+ s_mp_mul_comba_4(a, b, c);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 8) {
+ s_mp_mul_comba_8(a, b, c);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 16) {
+ s_mp_mul_comba_16(a, b, c);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 32) {
+ s_mp_mul_comba_32(a, b, c);
+ goto CLEANUP;
+ }
+ }
+#endif
+
+ pb = MP_DIGITS(b);
+ s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
+
+ /* Outer loop: Digits of b */
+ useda = MP_USED(a);
+ usedb = MP_USED(b);
+ for (ib = 1; ib < usedb; ib++) {
+ mp_digit b_i = *pb++;
+
+ /* Inner product: Digits of a */
+ if (constantTime || b_i)
+ s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
+ else
+ MP_DIGIT(c, ib + useda) = b_i;
+ }
+
+ if (!constantTime) {
+ s_mp_clamp(c);
+ }
+
+ if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
+ SIGN(c) = ZPOS;
+ else
+ SIGN(c) = NEG;
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+} /* end smp_mulg() */
+
+/* }}} */
+
+/* {{{ mp_mul(a, b, c) */
+
+/*
+ mp_mul(a, b, c)
+
+ Compute c = a * b. All parameters may be identical.
+ */
+
+mp_err
+mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ return s_mp_mulg(a, b, c, 0);
+} /* end mp_mul() */
+
+/* }}} */
+
+/* {{{ mp_mulCT(a, b, c) */
+
+/*
+ mp_mulCT(a, b, c)
+
+ Compute c = a * b. In constant time. Parameters may not be identical.
+ NOTE: a and b may be modified.
+ */
+
+mp_err
+mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize)
+{
+ mp_err res;
+
+ /* make the multiply values fixed length so multiply
+ * doesn't leak the length. at this point all the
+ * values are blinded, but once we finish we want the
+ * output size to be hidden (so no clamping the out put) */
+ MP_CHECKOK(s_mp_pad(a, setSize));
+ MP_CHECKOK(s_mp_pad(b, setSize));
+ MP_CHECKOK(s_mp_pad(c, 2 * setSize));
+ MP_CHECKOK(s_mp_mulg(a, b, c, 1));
+CLEANUP:
+ return res;
+} /* end mp_mulCT() */
+
+/* }}} */
+
+/* {{{ mp_sqr(a, sqr) */
+
+#if MP_SQUARE
+/*
+ Computes the square of a. This can be done more
+ efficiently than a general multiplication, because many of the
+ computation steps are redundant when squaring. The inner product
+ step is a bit more complicated, but we save a fair number of
+ iterations of the multiplication loop.
+ */
+
+/* sqr = a^2; Caller provides both a and tmp; */
+mp_err
+mp_sqr(const mp_int *a, mp_int *sqr)
+{
+ mp_digit *pa;
+ mp_digit d;
+ mp_err res;
+ mp_size ix;
+ mp_int tmp;
+ int count;
+
+ ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
+
+ if (a == sqr) {
+ if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
+ return res;
+ a = &tmp;
+ } else {
+ DIGITS(&tmp) = 0;
+ res = MP_OKAY;
+ }
+
+ ix = 2 * MP_USED(a);
+ if (ix > MP_ALLOC(sqr)) {
+ MP_USED(sqr) = 1;
+ MP_CHECKOK(s_mp_grow(sqr, ix));
+ }
+ MP_USED(sqr) = ix;
+ MP_DIGIT(sqr, 0) = 0;
+
+#ifdef NSS_USE_COMBA
+ if (IS_POWER_OF_2(MP_USED(a))) {
+ if (MP_USED(a) == 4) {
+ s_mp_sqr_comba_4(a, sqr);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 8) {
+ s_mp_sqr_comba_8(a, sqr);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 16) {
+ s_mp_sqr_comba_16(a, sqr);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 32) {
+ s_mp_sqr_comba_32(a, sqr);
+ goto CLEANUP;
+ }
+ }
+#endif
+
+ pa = MP_DIGITS(a);
+ count = MP_USED(a) - 1;
+ if (count > 0) {
+ d = *pa++;
+ s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
+ for (ix = 3; --count > 0; ix += 2) {
+ d = *pa++;
+ s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
+ } /* for(ix ...) */
+ MP_DIGIT(sqr, MP_USED(sqr) - 1) = 0; /* above loop stopped short of this. */
+
+ /* now sqr *= 2 */
+ s_mp_mul_2(sqr);
+ } else {
+ MP_DIGIT(sqr, 1) = 0;
+ }
+
+ /* now add the squares of the digits of a to sqr. */
+ s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
+
+ SIGN(sqr) = ZPOS;
+ s_mp_clamp(sqr);
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+
+} /* end mp_sqr() */
+#endif
+
+/* }}} */
+
+/* {{{ mp_div(a, b, q, r) */
+
+/*
+ mp_div(a, b, q, r)
+
+ Compute q = a / b and r = a mod b. Input parameters may be re-used
+ as output parameters. If q or r is NULL, that portion of the
+ computation will be discarded (although it will still be computed)
+ */
+mp_err
+mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
+{
+ mp_err res;
+ mp_int *pQ, *pR;
+ mp_int qtmp, rtmp, btmp;
+ int cmp;
+ mp_sign signA;
+ mp_sign signB;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ signA = MP_SIGN(a);
+ signB = MP_SIGN(b);
+
+ if (mp_cmp_z(b) == MP_EQ)
+ return MP_RANGE;
+
+ DIGITS(&qtmp) = 0;
+ DIGITS(&rtmp) = 0;
+ DIGITS(&btmp) = 0;
+
+ /* Set up some temporaries... */
+ if (!r || r == a || r == b) {
+ MP_CHECKOK(mp_init_copy(&rtmp, a));
+ pR = &rtmp;
+ } else {
+ MP_CHECKOK(mp_copy(a, r));
+ pR = r;
+ }
+
+ if (!q || q == a || q == b) {
+ MP_CHECKOK(mp_init_size(&qtmp, MP_USED(a)));
+ pQ = &qtmp;
+ } else {
+ MP_CHECKOK(s_mp_pad(q, MP_USED(a)));
+ pQ = q;
+ mp_zero(pQ);
+ }
+
+ /*
+ If |a| <= |b|, we can compute the solution without division;
+ otherwise, we actually do the work required.
+ */
+ if ((cmp = s_mp_cmp(a, b)) <= 0) {
+ if (cmp) {
+ /* r was set to a above. */
+ mp_zero(pQ);
+ } else {
+ mp_set(pQ, 1);
+ mp_zero(pR);
+ }
+ } else {
+ MP_CHECKOK(mp_init_copy(&btmp, b));
+ MP_CHECKOK(s_mp_div(pR, &btmp, pQ));
+ }
+
+ /* Compute the signs for the output */
+ MP_SIGN(pR) = signA; /* Sr = Sa */
+ /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
+ MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
+
+ if (s_mp_cmp_d(pQ, 0) == MP_EQ)
+ SIGN(pQ) = ZPOS;
+ if (s_mp_cmp_d(pR, 0) == MP_EQ)
+ SIGN(pR) = ZPOS;
+
+ /* Copy output, if it is needed */
+ if (q && q != pQ)
+ s_mp_exch(pQ, q);
+
+ if (r && r != pR)
+ s_mp_exch(pR, r);
+
+CLEANUP:
+ mp_clear(&btmp);
+ mp_clear(&rtmp);
+ mp_clear(&qtmp);
+
+ return res;
+
+} /* end mp_div() */
+
+/* }}} */
+
+/* {{{ mp_div_2d(a, d, q, r) */
+
+mp_err
+mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ if (q) {
+ if ((res = mp_copy(a, q)) != MP_OKAY)
+ return res;
+ }
+ if (r) {
+ if ((res = mp_copy(a, r)) != MP_OKAY)
+ return res;
+ }
+ if (q) {
+ s_mp_div_2d(q, d);
+ }
+ if (r) {
+ s_mp_mod_2d(r, d);
+ }
+
+ return MP_OKAY;
+
+} /* end mp_div_2d() */
+
+/* }}} */
+
+/* {{{ mp_expt(a, b, c) */
+
+/*
+ mp_expt(a, b, c)
+
+ Compute c = a ** b, that is, raise a to the b power. Uses a
+ standard iterative square-and-multiply technique.
+ */
+
+mp_err
+mp_expt(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_int s, x;
+ mp_err res;
+ mp_digit d;
+ unsigned int dig, bit;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (mp_cmp_z(b) < 0)
+ return MP_RANGE;
+
+ if ((res = mp_init(&s)) != MP_OKAY)
+ return res;
+
+ mp_set(&s, 1);
+
+ if ((res = mp_init_copy(&x, a)) != MP_OKAY)
+ goto X;
+
+ /* Loop over low-order digits in ascending order */
+ for (dig = 0; dig < (USED(b) - 1); dig++) {
+ d = DIGIT(b, dig);
+
+ /* Loop over bits of each non-maximal digit */
+ for (bit = 0; bit < DIGIT_BIT; bit++) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d >>= 1;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+ }
+
+ /* Consider now the last digit... */
+ d = DIGIT(b, dig);
+
+ while (d) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d >>= 1;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ if (mp_iseven(b))
+ SIGN(&s) = SIGN(a);
+
+ res = mp_copy(&s, c);
+
+CLEANUP:
+ mp_clear(&x);
+X:
+ mp_clear(&s);
+
+ return res;
+
+} /* end mp_expt() */
+
+/* }}} */
+
+/* {{{ mp_2expt(a, k) */
+
+/* Compute a = 2^k */
+
+mp_err
+mp_2expt(mp_int *a, mp_digit k)
+{
+ ARGCHK(a != NULL, MP_BADARG);
+
+ return s_mp_2expt(a, k);
+
+} /* end mp_2expt() */
+
+/* }}} */
+
+/* {{{ mp_mod(a, m, c) */
+
+/*
+ mp_mod(a, m, c)
+
+ Compute c = a (mod m). Result will always be 0 <= c < m.
+ */
+
+mp_err
+mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+ int mag;
+
+ ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if (SIGN(m) == NEG)
+ return MP_RANGE;
+
+ /*
+ If |a| > m, we need to divide to get the remainder and take the
+ absolute value.
+
+ If |a| < m, we don't need to do any division, just copy and adjust
+ the sign (if a is negative).
+
+ If |a| == m, we can simply set the result to zero.
+
+ This order is intended to minimize the average path length of the
+ comparison chain on common workloads -- the most frequent cases are
+ that |a| != m, so we do those first.
+ */
+ if ((mag = s_mp_cmp(a, m)) > 0) {
+ if ((res = mp_div(a, m, NULL, c)) != MP_OKAY)
+ return res;
+
+ if (SIGN(c) == NEG) {
+ if ((res = mp_add(c, m, c)) != MP_OKAY)
+ return res;
+ }
+
+ } else if (mag < 0) {
+ if ((res = mp_copy(a, c)) != MP_OKAY)
+ return res;
+
+ if (mp_cmp_z(a) < 0) {
+ if ((res = mp_add(c, m, c)) != MP_OKAY)
+ return res;
+ }
+
+ } else {
+ mp_zero(c);
+ }
+
+ return MP_OKAY;
+
+} /* end mp_mod() */
+
+/* }}} */
+
+/* {{{ s_mp_subCT_d(a, b, borrow, c) */
+
+/*
+ s_mp_subCT_d(a, b, borrow, c)
+
+ Compute c = (a -b) - subtract in constant time. returns borrow
+ */
+mp_digit
+s_mp_subCT_d(mp_digit a, mp_digit b, mp_digit borrow, mp_digit *ret)
+{
+ *ret = a - b - borrow;
+ return MP_CT_LTU(a, *ret) | (MP_CT_EQ(a, *ret) & borrow);
+} /* s_mp_subCT_d() */
+
+/* }}} */
+
+/* {{{ mp_subCT(a, b, ret, borrow) */
+
+/* return ret= a - b and borrow in borrow. done in constant time.
+ * b could be modified.
+ */
+mp_err
+mp_subCT(const mp_int *a, mp_int *b, mp_int *ret, mp_digit *borrow)
+{
+ mp_size used_a = MP_USED(a);
+ mp_size i;
+ mp_err res;
+
+ MP_CHECKOK(s_mp_pad(b, used_a));
+ MP_CHECKOK(s_mp_pad(ret, used_a));
+ *borrow = 0;
+ for (i = 0; i < used_a; i++) {
+ *borrow = s_mp_subCT_d(MP_DIGIT(a, i), MP_DIGIT(b, i), *borrow,
+ &MP_DIGIT(ret, i));
+ }
+
+ res = MP_OKAY;
+CLEANUP:
+ return res;
+} /* end mp_subCT() */
+
+/* }}} */
+
+/* {{{ mp_selectCT(cond, a, b, ret) */
+
+/*
+ * return ret= cond ? a : b; cond should be either 0 or 1
+ */
+mp_err
+mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret)
+{
+ mp_size used_a = MP_USED(a);
+ mp_err res;
+ mp_size i;
+
+ cond *= MP_DIGIT_MAX;
+
+ /* we currently require these to be equal on input,
+ * we could use pad to extend one of them, but that might
+ * leak data as it wouldn't be constant time */
+ if (used_a != MP_USED(b)) {
+ return MP_BADARG;
+ }
+
+ MP_CHECKOK(s_mp_pad(ret, used_a));
+ for (i = 0; i < used_a; i++) {
+ MP_DIGIT(ret, i) = MP_CT_SEL_DIGIT(cond, MP_DIGIT(a, i), MP_DIGIT(b, i));
+ }
+ res = MP_OKAY;
+CLEANUP:
+ return res;
+} /* end mp_selectCT() */
+
+/* {{{ mp_reduceCT(a, m, c) */
+
+/*
+ mp_reduceCT(a, m, c)
+
+ Compute c = aR^-1 (mod m) in constant time.
+ input should be in montgomery form. If input is the
+ result of a montgomery multiply then out put will be
+ in mongomery form.
+ Result will be reduced to MP_USED(m), but not be
+ clamped.
+ */
+
+mp_err
+mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *c)
+{
+ mp_size used_m = MP_USED(m);
+ mp_size used_c = used_m * 2 + 1;
+ mp_digit *m_digits, *c_digits;
+ mp_size i;
+ mp_digit borrow, carry;
+ mp_err res;
+ mp_int sub;
+
+ MP_DIGITS(&sub) = 0;
+ MP_CHECKOK(mp_init_size(&sub, used_m));
+
+ if (a != c) {
+ MP_CHECKOK(mp_copy(a, c));
+ }
+ MP_CHECKOK(s_mp_pad(c, used_c));
+ m_digits = MP_DIGITS(m);
+ c_digits = MP_DIGITS(c);
+ for (i = 0; i < used_m; i++) {
+ mp_digit m_i = MP_DIGIT(c, i) * n0i;
+ s_mpv_mul_d_add_propCT(m_digits, used_m, m_i, c_digits++, used_c--);
+ }
+ s_mp_rshd(c, used_m);
+ /* MP_USED(c) should be used_m+1 with the high word being any carry
+ * from the previous multiply, save that carry and drop the high
+ * word for the substraction below */
+ carry = MP_DIGIT(c, used_m);
+ MP_DIGIT(c, used_m) = 0;
+ MP_USED(c) = used_m;
+ /* mp_subCT wants c and m to be the same size, we've already
+ * guarrenteed that in the previous statement, so mp_subCT won't actually
+ * modify m, so it's safe to recast */
+ MP_CHECKOK(mp_subCT(c, (mp_int *)m, &sub, &borrow));
+
+ /* we return c-m if c >= m no borrow or there was a borrow and a carry */
+ MP_CHECKOK(mp_selectCT(borrow ^ carry, c, &sub, c));
+ res = MP_OKAY;
+CLEANUP:
+ mp_clear(&sub);
+ return res;
+} /* end mp_reduceCT() */
+
+/* }}} */
+
+/* {{{ mp_mod_d(a, d, c) */
+
+/*
+ mp_mod_d(a, d, c)
+
+ Compute c = a (mod d). Result will always be 0 <= c < d
+ */
+mp_err
+mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
+{
+ mp_err res;
+ mp_digit rem;
+
+ ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+ if (s_mp_cmp_d(a, d) > 0) {
+ if ((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
+ return res;
+
+ } else {
+ if (SIGN(a) == NEG)
+ rem = d - DIGIT(a, 0);
+ else
+ rem = DIGIT(a, 0);
+ }
+
+ if (c)
+ *c = rem;
+
+ return MP_OKAY;
+
+} /* end mp_mod_d() */
+
+/* }}} */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Modular arithmetic */
+
+#if MP_MODARITH
+/* {{{ mp_addmod(a, b, m, c) */
+
+/*
+ mp_addmod(a, b, m, c)
+
+ Compute c = (a + b) mod m
+ */
+
+mp_err
+mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_add(a, b, c)) != MP_OKAY)
+ return res;
+ if ((res = mp_mod(c, m, c)) != MP_OKAY)
+ return res;
+
+ return MP_OKAY;
+}
+
+/* }}} */
+
+/* {{{ mp_submod(a, b, m, c) */
+
+/*
+ mp_submod(a, b, m, c)
+
+ Compute c = (a - b) mod m
+ */
+
+mp_err
+mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_sub(a, b, c)) != MP_OKAY)
+ return res;
+ if ((res = mp_mod(c, m, c)) != MP_OKAY)
+ return res;
+
+ return MP_OKAY;
+}
+
+/* }}} */
+
+/* {{{ mp_mulmod(a, b, m, c) */
+
+/*
+ mp_mulmod(a, b, m, c)
+
+ Compute c = (a * b) mod m
+ */
+
+mp_err
+mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_mul(a, b, c)) != MP_OKAY)
+ return res;
+ if ((res = mp_mod(c, m, c)) != MP_OKAY)
+ return res;
+
+ return MP_OKAY;
+}
+
+/* }}} */
+
+/* {{{ mp_mulmontmodCT(a, b, m, c) */
+
+/*
+ mp_mulmontmodCT(a, b, m, c)
+
+ Compute c = (a * b) mod m in constant time wrt a and b. either a or b
+ should be in montgomery form and the output is native. If both a and b
+ are in montgomery form, then the output will also be in montgomery form
+ and can be recovered with an mp_reduceCT call.
+ NOTE: a and b may be modified.
+ */
+
+mp_err
+mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i,
+ mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_mulCT(a, b, c, MP_USED(m))) != MP_OKAY)
+ return res;
+
+ if ((res = mp_reduceCT(c, m, n0i, c)) != MP_OKAY)
+ return res;
+
+ return MP_OKAY;
+}
+
+/* }}} */
+
+/* {{{ mp_sqrmod(a, m, c) */
+
+#if MP_SQUARE
+mp_err
+mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_sqr(a, c)) != MP_OKAY)
+ return res;
+ if ((res = mp_mod(c, m, c)) != MP_OKAY)
+ return res;
+
+ return MP_OKAY;
+
+} /* end mp_sqrmod() */
+#endif
+
+/* }}} */
+
+/* {{{ s_mp_exptmod(a, b, m, c) */
+
+/*
+ s_mp_exptmod(a, b, m, c)
+
+ Compute c = (a ** b) mod m. Uses a standard square-and-multiply
+ method with modular reductions at each step. (This is basically the
+ same code as mp_expt(), except for the addition of the reductions)
+
+ The modular reductions are done using Barrett's algorithm (see
+ s_mp_reduce() below for details)
+ */
+
+mp_err
+s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
+{
+ mp_int s, x, mu;
+ mp_err res;
+ mp_digit d;
+ unsigned int dig, bit;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL && m != NULL, MP_BADARG);
+
+ if (mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
+ return MP_RANGE;
+
+ if ((res = mp_init(&s)) != MP_OKAY)
+ return res;
+ if ((res = mp_init_copy(&x, a)) != MP_OKAY ||
+ (res = mp_mod(&x, m, &x)) != MP_OKAY)
+ goto X;
+ if ((res = mp_init(&mu)) != MP_OKAY)
+ goto MU;
+
+ mp_set(&s, 1);
+
+ /* mu = b^2k / m */
+ if ((res = s_mp_add_d(&mu, 1)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = s_mp_lshd(&mu, 2 * USED(m))) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
+ goto CLEANUP;
+
+ /* Loop over digits of b in ascending order, except highest order */
+ for (dig = 0; dig < (USED(b) - 1); dig++) {
+ d = DIGIT(b, dig);
+
+ /* Loop over the bits of the lower-order digits */
+ for (bit = 0; bit < DIGIT_BIT; bit++) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d >>= 1;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
+ goto CLEANUP;
+ }
+ }
+
+ /* Now do the last digit... */
+ d = DIGIT(b, dig);
+
+ while (d) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d >>= 1;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ s_mp_exch(&s, c);
+
+CLEANUP:
+ mp_clear(&mu);
+MU:
+ mp_clear(&x);
+X:
+ mp_clear(&s);
+
+ return res;
+
+} /* end s_mp_exptmod() */
+
+/* }}} */
+
+/* {{{ mp_exptmod_d(a, d, m, c) */
+
+mp_err
+mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
+{
+ mp_int s, x;
+ mp_err res;
+
+ ARGCHK(a != NULL && c != NULL && m != NULL, MP_BADARG);
+
+ if ((res = mp_init(&s)) != MP_OKAY)
+ return res;
+ if ((res = mp_init_copy(&x, a)) != MP_OKAY)
+ goto X;
+
+ mp_set(&s, 1);
+
+ while (d != 0) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY ||
+ (res = mp_mod(&s, m, &s)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d /= 2;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY ||
+ (res = mp_mod(&x, m, &x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ s_mp_exch(&s, c);
+
+CLEANUP:
+ mp_clear(&x);
+X:
+ mp_clear(&s);
+
+ return res;
+
+} /* end mp_exptmod_d() */
+
+/* }}} */
+#endif /* if MP_MODARITH */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Comparison functions */
+
+/* {{{ mp_cmp_z(a) */
+
+/*
+ mp_cmp_z(a)
+
+ Compare a <=> 0. Returns <0 if a<0, 0 if a=0, >0 if a>0.
+ */
+
+int
+mp_cmp_z(const mp_int *a)
+{
+ ARGMPCHK(a != NULL);
+
+ if (SIGN(a) == NEG)
+ return MP_LT;
+ else if (USED(a) == 1 && DIGIT(a, 0) == 0)
+ return MP_EQ;
+ else
+ return MP_GT;
+
+} /* end mp_cmp_z() */
+
+/* }}} */
+
+/* {{{ mp_cmp_d(a, d) */
+
+/*
+ mp_cmp_d(a, d)
+
+ Compare a <=> d. Returns <0 if a<d, 0 if a=d, >0 if a>d
+ */
+
+int
+mp_cmp_d(const mp_int *a, mp_digit d)
+{
+ ARGCHK(a != NULL, MP_EQ);
+
+ if (SIGN(a) == NEG)
+ return MP_LT;
+
+ return s_mp_cmp_d(a, d);
+
+} /* end mp_cmp_d() */
+
+/* }}} */
+
+/* {{{ mp_cmp(a, b) */
+
+int
+mp_cmp(const mp_int *a, const mp_int *b)
+{
+ ARGCHK(a != NULL && b != NULL, MP_EQ);
+
+ if (SIGN(a) == SIGN(b)) {
+ int mag;
+
+ if ((mag = s_mp_cmp(a, b)) == MP_EQ)
+ return MP_EQ;
+
+ if (SIGN(a) == ZPOS)
+ return mag;
+ else
+ return -mag;
+
+ } else if (SIGN(a) == ZPOS) {
+ return MP_GT;
+ } else {
+ return MP_LT;
+ }
+
+} /* end mp_cmp() */
+
+/* }}} */
+
+/* {{{ mp_cmp_mag(a, b) */
+
+/*
+ mp_cmp_mag(a, b)
+
+ Compares |a| <=> |b|, and returns an appropriate comparison result
+ */
+
+int
+mp_cmp_mag(const mp_int *a, const mp_int *b)
+{
+ ARGCHK(a != NULL && b != NULL, MP_EQ);
+
+ return s_mp_cmp(a, b);
+
+} /* end mp_cmp_mag() */
+
+/* }}} */
+
+/* {{{ mp_isodd(a) */
+
+/*
+ mp_isodd(a)
+
+ Returns a true (non-zero) value if a is odd, false (zero) otherwise.
+ */
+int
+mp_isodd(const mp_int *a)
+{
+ ARGMPCHK(a != NULL);
+
+ return (int)(DIGIT(a, 0) & 1);
+
+} /* end mp_isodd() */
+
+/* }}} */
+
+/* {{{ mp_iseven(a) */
+
+int
+mp_iseven(const mp_int *a)
+{
+ return !mp_isodd(a);
+
+} /* end mp_iseven() */
+
+/* }}} */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Number theoretic functions */
+
+/* {{{ mp_gcd(a, b, c) */
+
+/*
+ Computes the GCD using the constant-time algorithm
+ by Bernstein and Yang (https://eprint.iacr.org/2019/266)
+ "Fast constant-time gcd computation and modular inversion"
+ */
+mp_err
+mp_gcd(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_err res;
+ mp_digit cond = 0, mask = 0;
+ mp_int g, temp, f;
+ int i, j, m, bit = 1, delta = 1, shifts = 0, last = -1;
+ mp_size top, flen, glen;
+ mp_int *clear[3];
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+ /*
+ Early exit if either of the inputs is zero.
+ Caller is responsible for the proper handling of inputs.
+ */
+ if (mp_cmp_z(a) == MP_EQ) {
+ res = mp_copy(b, c);
+ SIGN(c) = ZPOS;
+ return res;
+ } else if (mp_cmp_z(b) == MP_EQ) {
+ res = mp_copy(a, c);
+ SIGN(c) = ZPOS;
+ return res;
+ }
+
+ MP_CHECKOK(mp_init(&temp));
+ clear[++last] = &temp;
+ MP_CHECKOK(mp_init_copy(&g, a));
+ clear[++last] = &g;
+ MP_CHECKOK(mp_init_copy(&f, b));
+ clear[++last] = &f;
+
+ /*
+ For even case compute the number of
+ shared powers of 2 in f and g.
+ */
+ for (i = 0; i < USED(&f) && i < USED(&g); i++) {
+ mask = ~(DIGIT(&f, i) | DIGIT(&g, i));
+ for (j = 0; j < MP_DIGIT_BIT; j++) {
+ bit &= mask;
+ shifts += bit;
+ mask >>= 1;
+ }
+ }
+ /* Reduce to the odd case by removing the powers of 2. */
+ s_mp_div_2d(&f, shifts);
+ s_mp_div_2d(&g, shifts);
+
+ /* Allocate to the size of largest mp_int. */
+ top = (mp_size)1 + ((USED(&f) >= USED(&g)) ? USED(&f) : USED(&g));
+ MP_CHECKOK(s_mp_grow(&f, top));
+ MP_CHECKOK(s_mp_grow(&g, top));
+ MP_CHECKOK(s_mp_grow(&temp, top));
+
+ /* Make sure f contains the odd value. */
+ MP_CHECKOK(mp_cswap((~DIGIT(&f, 0) & 1), &f, &g, top));
+
+ /* Upper bound for the total iterations. */
+ flen = mpl_significant_bits(&f);
+ glen = mpl_significant_bits(&g);
+ m = 4 + 3 * ((flen >= glen) ? flen : glen);
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
+#endif
+
+ for (i = 0; i < m; i++) {
+ /* Step 1: conditional swap. */
+ /* Set cond if delta > 0 and g is odd. */
+ cond = (-delta >> (8 * sizeof(delta) - 1)) & DIGIT(&g, 0) & 1;
+ /* If cond is set replace (delta,f) with (-delta,-f). */
+ delta = (-cond & -delta) | ((cond - 1) & delta);
+ SIGN(&f) ^= cond;
+ /* If cond is set swap f with g. */
+ MP_CHECKOK(mp_cswap(cond, &f, &g, top));
+
+ /* Step 2: elemination. */
+ /* Update delta. */
+ delta++;
+ /* If g is odd, right shift (g+f) else right shift g. */
+ MP_CHECKOK(mp_add(&g, &f, &temp));
+ MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &g, &temp, top));
+ s_mp_div_2(&g);
+ }
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+ /* GCD is in f, take the absolute value. */
+ SIGN(&f) = ZPOS;
+
+ /* Add back the removed powers of 2. */
+ MP_CHECKOK(s_mp_mul_2d(&f, shifts));
+
+ MP_CHECKOK(mp_copy(&f, c));
+
+CLEANUP:
+ while (last >= 0)
+ mp_clear(clear[last--]);
+ return res;
+} /* end mp_gcd() */
+
+/* }}} */
+
+/* {{{ mp_lcm(a, b, c) */
+
+/* We compute the least common multiple using the rule:
+
+ ab = [a, b](a, b)
+
+ ... by computing the product, and dividing out the gcd.
+ */
+
+mp_err
+mp_lcm(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_int gcd, prod;
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ /* Set up temporaries */
+ if ((res = mp_init(&gcd)) != MP_OKAY)
+ return res;
+ if ((res = mp_init(&prod)) != MP_OKAY)
+ goto GCD;
+
+ if ((res = mp_mul(a, b, &prod)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
+ goto CLEANUP;
+
+ res = mp_div(&prod, &gcd, c, NULL);
+
+CLEANUP:
+ mp_clear(&prod);
+GCD:
+ mp_clear(&gcd);
+
+ return res;
+
+} /* end mp_lcm() */
+
+/* }}} */
+
+/* {{{ mp_xgcd(a, b, g, x, y) */
+
+/*
+ mp_xgcd(a, b, g, x, y)
+
+ Compute g = (a, b) and values x and y satisfying Bezout's identity
+ (that is, ax + by = g). This uses the binary extended GCD algorithm
+ based on the Stein algorithm used for mp_gcd()
+ See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
+ */
+
+mp_err
+mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
+{
+ mp_int gx, xc, yc, u, v, A, B, C, D;
+ mp_int *clean[9];
+ mp_err res;
+ int last = -1;
+
+ if (mp_cmp_z(b) == 0)
+ return MP_RANGE;
+
+ /* Initialize all these variables we need */
+ MP_CHECKOK(mp_init(&u));
+ clean[++last] = &u;
+ MP_CHECKOK(mp_init(&v));
+ clean[++last] = &v;
+ MP_CHECKOK(mp_init(&gx));
+ clean[++last] = &gx;
+ MP_CHECKOK(mp_init(&A));
+ clean[++last] = &A;
+ MP_CHECKOK(mp_init(&B));
+ clean[++last] = &B;
+ MP_CHECKOK(mp_init(&C));
+ clean[++last] = &C;
+ MP_CHECKOK(mp_init(&D));
+ clean[++last] = &D;
+ MP_CHECKOK(mp_init_copy(&xc, a));
+ clean[++last] = &xc;
+ mp_abs(&xc, &xc);
+ MP_CHECKOK(mp_init_copy(&yc, b));
+ clean[++last] = &yc;
+ mp_abs(&yc, &yc);
+
+ mp_set(&gx, 1);
+
+ /* Divide by two until at least one of them is odd */
+ while (mp_iseven(&xc) && mp_iseven(&yc)) {
+ mp_size nx = mp_trailing_zeros(&xc);
+ mp_size ny = mp_trailing_zeros(&yc);
+ mp_size n = MP_MIN(nx, ny);
+ s_mp_div_2d(&xc, n);
+ s_mp_div_2d(&yc, n);
+ MP_CHECKOK(s_mp_mul_2d(&gx, n));
+ }
+
+ MP_CHECKOK(mp_copy(&xc, &u));
+ MP_CHECKOK(mp_copy(&yc, &v));
+ mp_set(&A, 1);
+ mp_set(&D, 1);
+
+ /* Loop through binary GCD algorithm */
+ do {
+ while (mp_iseven(&u)) {
+ s_mp_div_2(&u);
+
+ if (mp_iseven(&A) && mp_iseven(&B)) {
+ s_mp_div_2(&A);
+ s_mp_div_2(&B);
+ } else {
+ MP_CHECKOK(mp_add(&A, &yc, &A));
+ s_mp_div_2(&A);
+ MP_CHECKOK(mp_sub(&B, &xc, &B));
+ s_mp_div_2(&B);
+ }
+ }
+
+ while (mp_iseven(&v)) {
+ s_mp_div_2(&v);
+
+ if (mp_iseven(&C) && mp_iseven(&D)) {
+ s_mp_div_2(&C);
+ s_mp_div_2(&D);
+ } else {
+ MP_CHECKOK(mp_add(&C, &yc, &C));
+ s_mp_div_2(&C);
+ MP_CHECKOK(mp_sub(&D, &xc, &D));
+ s_mp_div_2(&D);
+ }
+ }
+
+ if (mp_cmp(&u, &v) >= 0) {
+ MP_CHECKOK(mp_sub(&u, &v, &u));
+ MP_CHECKOK(mp_sub(&A, &C, &A));
+ MP_CHECKOK(mp_sub(&B, &D, &B));
+ } else {
+ MP_CHECKOK(mp_sub(&v, &u, &v));
+ MP_CHECKOK(mp_sub(&C, &A, &C));
+ MP_CHECKOK(mp_sub(&D, &B, &D));
+ }
+ } while (mp_cmp_z(&u) != 0);
+
+ /* copy results to output */
+ if (x)
+ MP_CHECKOK(mp_copy(&C, x));
+
+ if (y)
+ MP_CHECKOK(mp_copy(&D, y));
+
+ if (g)
+ MP_CHECKOK(mp_mul(&gx, &v, g));
+
+CLEANUP:
+ while (last >= 0)
+ mp_clear(clean[last--]);
+
+ return res;
+
+} /* end mp_xgcd() */
+
+/* }}} */
+
+mp_size
+mp_trailing_zeros(const mp_int *mp)
+{
+ mp_digit d;
+ mp_size n = 0;
+ unsigned int ix;
+
+ if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
+ return n;
+
+ for (ix = 0; !(d = MP_DIGIT(mp, ix)) && (ix < MP_USED(mp)); ++ix)
+ n += MP_DIGIT_BIT;
+ if (!d)
+ return 0; /* shouldn't happen, but ... */
+#if !defined(MP_USE_UINT_DIGIT)
+ if (!(d & 0xffffffffU)) {
+ d >>= 32;
+ n += 32;
+ }
+#endif
+ if (!(d & 0xffffU)) {
+ d >>= 16;
+ n += 16;
+ }
+ if (!(d & 0xffU)) {
+ d >>= 8;
+ n += 8;
+ }
+ if (!(d & 0xfU)) {
+ d >>= 4;
+ n += 4;
+ }
+ if (!(d & 0x3U)) {
+ d >>= 2;
+ n += 2;
+ }
+ if (!(d & 0x1U)) {
+ d >>= 1;
+ n += 1;
+ }
+#if MP_ARGCHK == 2
+ assert(0 != (d & 1));
+#endif
+ return n;
+}
+
+/* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
+** Returns k (positive) or error (negative).
+** This technique from the paper "Fast Modular Reciprocals" (unpublished)
+** by Richard Schroeppel (a.k.a. Captain Nemo).
+*/
+mp_err
+s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
+{
+ mp_err res;
+ mp_err k = 0;
+ mp_int d, f, g;
+
+ ARGCHK(a != NULL && p != NULL && c != NULL, MP_BADARG);
+
+ MP_DIGITS(&d) = 0;
+ MP_DIGITS(&f) = 0;
+ MP_DIGITS(&g) = 0;
+ MP_CHECKOK(mp_init(&d));
+ MP_CHECKOK(mp_init_copy(&f, a)); /* f = a */
+ MP_CHECKOK(mp_init_copy(&g, p)); /* g = p */
+
+ mp_set(c, 1);
+ mp_zero(&d);
+
+ if (mp_cmp_z(&f) == 0) {
+ res = MP_UNDEF;
+ } else
+ for (;;) {
+ int diff_sign;
+ while (mp_iseven(&f)) {
+ mp_size n = mp_trailing_zeros(&f);
+ if (!n) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+ s_mp_div_2d(&f, n);
+ MP_CHECKOK(s_mp_mul_2d(&d, n));
+ k += n;
+ }
+ if (mp_cmp_d(&f, 1) == MP_EQ) { /* f == 1 */
+ res = k;
+ break;
+ }
+ diff_sign = mp_cmp(&f, &g);
+ if (diff_sign < 0) { /* f < g */
+ s_mp_exch(&f, &g);
+ s_mp_exch(c, &d);
+ } else if (diff_sign == 0) { /* f == g */
+ res = MP_UNDEF; /* a and p are not relatively prime */
+ break;
+ }
+ if ((MP_DIGIT(&f, 0) % 4) == (MP_DIGIT(&g, 0) % 4)) {
+ MP_CHECKOK(mp_sub(&f, &g, &f)); /* f = f - g */
+ MP_CHECKOK(mp_sub(c, &d, c)); /* c = c - d */
+ } else {
+ MP_CHECKOK(mp_add(&f, &g, &f)); /* f = f + g */
+ MP_CHECKOK(mp_add(c, &d, c)); /* c = c + d */
+ }
+ }
+ if (res >= 0) {
+ if (mp_cmp_mag(c, p) >= 0) {
+ MP_CHECKOK(mp_div(c, p, NULL, c));
+ }
+ if (MP_SIGN(c) != MP_ZPOS) {
+ MP_CHECKOK(mp_add(c, p, c));
+ }
+ res = k;
+ }
+
+CLEANUP:
+ mp_clear(&d);
+ mp_clear(&f);
+ mp_clear(&g);
+ return res;
+}
+
+/* Compute T = (P ** -1) mod MP_RADIX. Also works for 16-bit mp_digits.
+** This technique from the paper "Fast Modular Reciprocals" (unpublished)
+** by Richard Schroeppel (a.k.a. Captain Nemo).
+*/
+mp_digit
+s_mp_invmod_radix(mp_digit P)
+{
+ mp_digit T = P;
+ T *= 2 - (P * T);
+ T *= 2 - (P * T);
+ T *= 2 - (P * T);
+ T *= 2 - (P * T);
+#if !defined(MP_USE_UINT_DIGIT)
+ T *= 2 - (P * T);
+ T *= 2 - (P * T);
+#endif
+ return T;
+}
+
+/* Given c, k, and prime p, where a*c == 2**k (mod p),
+** Compute x = (a ** -1) mod p. This is similar to Montgomery reduction.
+** This technique from the paper "Fast Modular Reciprocals" (unpublished)
+** by Richard Schroeppel (a.k.a. Captain Nemo).
+*/
+mp_err
+s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
+{
+ int k_orig = k;
+ mp_digit r;
+ mp_size ix;
+ mp_err res;
+
+ if (mp_cmp_z(c) < 0) { /* c < 0 */
+ MP_CHECKOK(mp_add(c, p, x)); /* x = c + p */
+ } else {
+ MP_CHECKOK(mp_copy(c, x)); /* x = c */
+ }
+
+ /* make sure x is large enough */
+ ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
+ ix = MP_MAX(ix, MP_USED(x));
+ MP_CHECKOK(s_mp_pad(x, ix));
+
+ r = 0 - s_mp_invmod_radix(MP_DIGIT(p, 0));
+
+ for (ix = 0; k > 0; ix++) {
+ int j = MP_MIN(k, MP_DIGIT_BIT);
+ mp_digit v = r * MP_DIGIT(x, ix);
+ if (j < MP_DIGIT_BIT) {
+ v &= ((mp_digit)1 << j) - 1; /* v = v mod (2 ** j) */
+ }
+ s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
+ k -= j;
+ }
+ s_mp_clamp(x);
+ s_mp_div_2d(x, k_orig);
+ res = MP_OKAY;
+
+CLEANUP:
+ return res;
+}
+
+/*
+ Computes the modular inverse using the constant-time algorithm
+ by Bernstein and Yang (https://eprint.iacr.org/2019/266)
+ "Fast constant-time gcd computation and modular inversion"
+ */
+mp_err
+s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+ mp_digit cond = 0;
+ mp_int g, f, v, r, temp;
+ int i, its, delta = 1, last = -1;
+ mp_size top, flen, glen;
+ mp_int *clear[6];
+
+ ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
+ /* Check for invalid inputs. */
+ if (mp_cmp_z(a) == MP_EQ || mp_cmp_d(m, 2) == MP_LT)
+ return MP_RANGE;
+
+ if (a == m || mp_iseven(m))
+ return MP_UNDEF;
+
+ MP_CHECKOK(mp_init(&temp));
+ clear[++last] = &temp;
+ MP_CHECKOK(mp_init(&v));
+ clear[++last] = &v;
+ MP_CHECKOK(mp_init(&r));
+ clear[++last] = &r;
+ MP_CHECKOK(mp_init_copy(&g, a));
+ clear[++last] = &g;
+ MP_CHECKOK(mp_init_copy(&f, m));
+ clear[++last] = &f;
+
+ mp_set(&v, 0);
+ mp_set(&r, 1);
+
+ /* Allocate to the size of largest mp_int. */
+ top = (mp_size)1 + ((USED(&f) >= USED(&g)) ? USED(&f) : USED(&g));
+ MP_CHECKOK(s_mp_grow(&f, top));
+ MP_CHECKOK(s_mp_grow(&g, top));
+ MP_CHECKOK(s_mp_grow(&temp, top));
+ MP_CHECKOK(s_mp_grow(&v, top));
+ MP_CHECKOK(s_mp_grow(&r, top));
+
+ /* Upper bound for the total iterations. */
+ flen = mpl_significant_bits(&f);
+ glen = mpl_significant_bits(&g);
+ its = 4 + 3 * ((flen >= glen) ? flen : glen);
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
+#endif
+
+ for (i = 0; i < its; i++) {
+ /* Step 1: conditional swap. */
+ /* Set cond if delta > 0 and g is odd. */
+ cond = (-delta >> (8 * sizeof(delta) - 1)) & DIGIT(&g, 0) & 1;
+ /* If cond is set replace (delta,f,v) with (-delta,-f,-v). */
+ delta = (-cond & -delta) | ((cond - 1) & delta);
+ SIGN(&f) ^= cond;
+ SIGN(&v) ^= cond;
+ /* If cond is set swap (f,v) with (g,r). */
+ MP_CHECKOK(mp_cswap(cond, &f, &g, top));
+ MP_CHECKOK(mp_cswap(cond, &v, &r, top));
+
+ /* Step 2: elemination. */
+ /* Update delta */
+ delta++;
+ /* If g is odd replace r with (r+v). */
+ MP_CHECKOK(mp_add(&r, &v, &temp));
+ MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &r, &temp, top));
+ /* If g is odd, right shift (g+f) else right shift g. */
+ MP_CHECKOK(mp_add(&g, &f, &temp));
+ MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &g, &temp, top));
+ s_mp_div_2(&g);
+ /*
+ If r is even, right shift it.
+ If r is odd, right shift (r+m) which is even because m is odd.
+ We want the result modulo m so adding in multiples of m here vanish.
+ */
+ MP_CHECKOK(mp_add(&r, m, &temp));
+ MP_CHECKOK(mp_cswap((DIGIT(&r, 0) & 1), &r, &temp, top));
+ s_mp_div_2(&r);
+ }
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+ /* We have the inverse in v, propagate sign from f. */
+ SIGN(&v) ^= SIGN(&f);
+ /* GCD is in f, take the absolute value. */
+ SIGN(&f) = ZPOS;
+
+ /* If gcd != 1, not invertible. */
+ if (mp_cmp_d(&f, 1) != MP_EQ) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+
+ /* Return inverse modulo m. */
+ MP_CHECKOK(mp_mod(&v, m, c));
+
+CLEANUP:
+ while (last >= 0)
+ mp_clear(clear[last--]);
+ return res;
+}
+
+/* Known good algorithm for computing modular inverse. But slow. */
+mp_err
+mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
+{
+ mp_int g, x;
+ mp_err res;
+
+ ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
+ return MP_RANGE;
+
+ MP_DIGITS(&g) = 0;
+ MP_DIGITS(&x) = 0;
+ MP_CHECKOK(mp_init(&x));
+ MP_CHECKOK(mp_init(&g));
+
+ MP_CHECKOK(mp_xgcd(a, m, &g, &x, NULL));
+
+ if (mp_cmp_d(&g, 1) != MP_EQ) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+
+ res = mp_mod(&x, m, c);
+ SIGN(c) = SIGN(a);
+
+CLEANUP:
+ mp_clear(&x);
+ mp_clear(&g);
+
+ return res;
+}
+
+/* modular inverse where modulus is 2**k. */
+/* c = a**-1 mod 2**k */
+mp_err
+s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
+{
+ mp_err res;
+ mp_size ix = k + 4;
+ mp_int t0, t1, val, tmp, two2k;
+
+ static const mp_digit d2 = 2;
+ static const mp_int two = { MP_ZPOS, 1, 1, (mp_digit *)&d2 };
+
+ if (mp_iseven(a))
+ return MP_UNDEF;
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
+#endif
+ if (k <= MP_DIGIT_BIT) {
+ mp_digit i = s_mp_invmod_radix(MP_DIGIT(a, 0));
+ /* propagate the sign from mp_int */
+ i = (i ^ -(mp_digit)SIGN(a)) + (mp_digit)SIGN(a);
+ if (k < MP_DIGIT_BIT)
+ i &= ((mp_digit)1 << k) - (mp_digit)1;
+ mp_set(c, i);
+ return MP_OKAY;
+ }
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+ MP_DIGITS(&t0) = 0;
+ MP_DIGITS(&t1) = 0;
+ MP_DIGITS(&val) = 0;
+ MP_DIGITS(&tmp) = 0;
+ MP_DIGITS(&two2k) = 0;
+ MP_CHECKOK(mp_init_copy(&val, a));
+ s_mp_mod_2d(&val, k);
+ MP_CHECKOK(mp_init_copy(&t0, &val));
+ MP_CHECKOK(mp_init_copy(&t1, &t0));
+ MP_CHECKOK(mp_init(&tmp));
+ MP_CHECKOK(mp_init(&two2k));
+ MP_CHECKOK(s_mp_2expt(&two2k, k));
+ do {
+ MP_CHECKOK(mp_mul(&val, &t1, &tmp));
+ MP_CHECKOK(mp_sub(&two, &tmp, &tmp));
+ MP_CHECKOK(mp_mul(&t1, &tmp, &t1));
+ s_mp_mod_2d(&t1, k);
+ while (MP_SIGN(&t1) != MP_ZPOS) {
+ MP_CHECKOK(mp_add(&t1, &two2k, &t1));
+ }
+ if (mp_cmp(&t1, &t0) == MP_EQ)
+ break;
+ MP_CHECKOK(mp_copy(&t1, &t0));
+ } while (--ix > 0);
+ if (!ix) {
+ res = MP_UNDEF;
+ } else {
+ mp_exch(c, &t1);
+ }
+
+CLEANUP:
+ mp_clear(&t0);
+ mp_clear(&t1);
+ mp_clear(&val);
+ mp_clear(&tmp);
+ mp_clear(&two2k);
+ return res;
+}
+
+mp_err
+s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+ mp_size k;
+ mp_int oddFactor, evenFactor; /* factors of the modulus */
+ mp_int oddPart, evenPart; /* parts to combine via CRT. */
+ mp_int C2, tmp1, tmp2;
+
+ ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ /*static const mp_digit d1 = 1; */
+ /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
+
+ if ((res = s_mp_ispow2(m)) >= 0) {
+ k = res;
+ return s_mp_invmod_2d(a, k, c);
+ }
+ MP_DIGITS(&oddFactor) = 0;
+ MP_DIGITS(&evenFactor) = 0;
+ MP_DIGITS(&oddPart) = 0;
+ MP_DIGITS(&evenPart) = 0;
+ MP_DIGITS(&C2) = 0;
+ MP_DIGITS(&tmp1) = 0;
+ MP_DIGITS(&tmp2) = 0;
+
+ MP_CHECKOK(mp_init_copy(&oddFactor, m)); /* oddFactor = m */
+ MP_CHECKOK(mp_init(&evenFactor));
+ MP_CHECKOK(mp_init(&oddPart));
+ MP_CHECKOK(mp_init(&evenPart));
+ MP_CHECKOK(mp_init(&C2));
+ MP_CHECKOK(mp_init(&tmp1));
+ MP_CHECKOK(mp_init(&tmp2));
+
+ k = mp_trailing_zeros(m);
+ s_mp_div_2d(&oddFactor, k);
+ MP_CHECKOK(s_mp_2expt(&evenFactor, k));
+
+ /* compute a**-1 mod oddFactor. */
+ MP_CHECKOK(s_mp_invmod_odd_m(a, &oddFactor, &oddPart));
+ /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
+ MP_CHECKOK(s_mp_invmod_2d(a, k, &evenPart));
+
+ /* Use Chinese Remainer theorem to compute a**-1 mod m. */
+ /* let m1 = oddFactor, v1 = oddPart,
+ * let m2 = evenFactor, v2 = evenPart.
+ */
+
+ /* Compute C2 = m1**-1 mod m2. */
+ MP_CHECKOK(s_mp_invmod_2d(&oddFactor, k, &C2));
+
+ /* compute u = (v2 - v1)*C2 mod m2 */
+ MP_CHECKOK(mp_sub(&evenPart, &oddPart, &tmp1));
+ MP_CHECKOK(mp_mul(&tmp1, &C2, &tmp2));
+ s_mp_mod_2d(&tmp2, k);
+ while (MP_SIGN(&tmp2) != MP_ZPOS) {
+ MP_CHECKOK(mp_add(&tmp2, &evenFactor, &tmp2));
+ }
+
+ /* compute answer = v1 + u*m1 */
+ MP_CHECKOK(mp_mul(&tmp2, &oddFactor, c));
+ MP_CHECKOK(mp_add(&oddPart, c, c));
+ /* not sure this is necessary, but it's low cost if not. */
+ MP_CHECKOK(mp_mod(c, m, c));
+
+CLEANUP:
+ mp_clear(&oddFactor);
+ mp_clear(&evenFactor);
+ mp_clear(&oddPart);
+ mp_clear(&evenPart);
+ mp_clear(&C2);
+ mp_clear(&tmp1);
+ mp_clear(&tmp2);
+ return res;
+}
+
+/* {{{ mp_invmod(a, m, c) */
+
+/*
+ mp_invmod(a, m, c)
+
+ Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
+ This is equivalent to the question of whether (a, m) = 1. If not,
+ MP_UNDEF is returned, and there is no inverse.
+ */
+
+mp_err
+mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
+{
+ ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
+ return MP_RANGE;
+
+ if (mp_isodd(m)) {
+ return s_mp_invmod_odd_m(a, m, c);
+ }
+ if (mp_iseven(a))
+ return MP_UNDEF; /* not invertable */
+
+ return s_mp_invmod_even_m(a, m, c);
+
+} /* end mp_invmod() */
+
+/* }}} */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ mp_print(mp, ofp) */
+
+#if MP_IOFUNC
+/*
+ mp_print(mp, ofp)
+
+ Print a textual representation of the given mp_int on the output
+ stream 'ofp'. Output is generated using the internal radix.
+ */
+
+void
+mp_print(mp_int *mp, FILE *ofp)
+{
+ int ix;
+
+ if (mp == NULL || ofp == NULL)
+ return;
+
+ fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
+
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
+ }
+
+} /* end mp_print() */
+
+#endif /* if MP_IOFUNC */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ More I/O Functions */
+
+/* {{{ mp_read_raw(mp, str, len) */
+
+/*
+ mp_read_raw(mp, str, len)
+
+ Read in a raw value (base 256) into the given mp_int
+ */
+
+mp_err
+mp_read_raw(mp_int *mp, char *str, int len)
+{
+ int ix;
+ mp_err res;
+ unsigned char *ustr = (unsigned char *)str;
+
+ ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
+
+ mp_zero(mp);
+
+ /* Read the rest of the digits */
+ for (ix = 1; ix < len; ix++) {
+ if ((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
+ return res;
+ if ((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
+ return res;
+ }
+
+ /* Get sign from first byte */
+ if (ustr[0])
+ SIGN(mp) = NEG;
+ else
+ SIGN(mp) = ZPOS;
+
+ return MP_OKAY;
+
+} /* end mp_read_raw() */
+
+/* }}} */
+
+/* {{{ mp_raw_size(mp) */
+
+int
+mp_raw_size(mp_int *mp)
+{
+ ARGCHK(mp != NULL, 0);
+
+ return (USED(mp) * sizeof(mp_digit)) + 1;
+
+} /* end mp_raw_size() */
+
+/* }}} */
+
+/* {{{ mp_toraw(mp, str) */
+
+mp_err
+mp_toraw(mp_int *mp, char *str)
+{
+ int ix, jx, pos = 1;
+
+ ARGCHK(mp != NULL && str != NULL, MP_BADARG);
+
+ str[0] = (char)SIGN(mp);
+
+ /* Iterate over each digit... */
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ mp_digit d = DIGIT(mp, ix);
+
+ /* Unpack digit bytes, high order first */
+ for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
+ str[pos++] = (char)(d >> (jx * CHAR_BIT));
+ }
+ }
+
+ return MP_OKAY;
+
+} /* end mp_toraw() */
+
+/* }}} */
+
+/* {{{ mp_read_radix(mp, str, radix) */
+
+/*
+ mp_read_radix(mp, str, radix)
+
+ Read an integer from the given string, and set mp to the resulting
+ value. The input is presumed to be in base 10. Leading non-digit
+ characters are ignored, and the function reads until a non-digit
+ character or the end of the string.
+ */
+
+mp_err
+mp_read_radix(mp_int *mp, const char *str, int radix)
+{
+ int ix = 0, val = 0;
+ mp_err res;
+ mp_sign sig = ZPOS;
+
+ ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
+ MP_BADARG);
+
+ mp_zero(mp);
+
+ /* Skip leading non-digit characters until a digit or '-' or '+' */
+ while (str[ix] &&
+ (s_mp_tovalue(str[ix], radix) < 0) &&
+ str[ix] != '-' &&
+ str[ix] != '+') {
+ ++ix;
+ }
+
+ if (str[ix] == '-') {
+ sig = NEG;
+ ++ix;
+ } else if (str[ix] == '+') {
+ sig = ZPOS; /* this is the default anyway... */
+ ++ix;
+ }
+
+ while ((val = s_mp_tovalue(str[ix], radix)) >= 0) {
+ if ((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
+ return res;
+ if ((res = s_mp_add_d(mp, val)) != MP_OKAY)
+ return res;
+ ++ix;
+ }
+
+ if (s_mp_cmp_d(mp, 0) == MP_EQ)
+ SIGN(mp) = ZPOS;
+ else
+ SIGN(mp) = sig;
+
+ return MP_OKAY;
+
+} /* end mp_read_radix() */
+
+mp_err
+mp_read_variable_radix(mp_int *a, const char *str, int default_radix)
+{
+ int radix = default_radix;
+ int cx;
+ mp_sign sig = ZPOS;
+ mp_err res;
+
+ /* Skip leading non-digit characters until a digit or '-' or '+' */
+ while ((cx = *str) != 0 &&
+ (s_mp_tovalue(cx, radix) < 0) &&
+ cx != '-' &&
+ cx != '+') {
+ ++str;
+ }
+
+ if (cx == '-') {
+ sig = NEG;
+ ++str;
+ } else if (cx == '+') {
+ sig = ZPOS; /* this is the default anyway... */
+ ++str;
+ }
+
+ if (str[0] == '0') {
+ if ((str[1] | 0x20) == 'x') {
+ radix = 16;
+ str += 2;
+ } else {
+ radix = 8;
+ str++;
+ }
+ }
+ res = mp_read_radix(a, str, radix);
+ if (res == MP_OKAY) {
+ MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
+ }
+ return res;
+}
+
+/* }}} */
+
+/* {{{ mp_radix_size(mp, radix) */
+
+int
+mp_radix_size(mp_int *mp, int radix)
+{
+ int bits;
+
+ if (!mp || radix < 2 || radix > MAX_RADIX)
+ return 0;
+
+ bits = USED(mp) * DIGIT_BIT - 1;
+
+ return SIGN(mp) + s_mp_outlen(bits, radix);
+
+} /* end mp_radix_size() */
+
+/* }}} */
+
+/* {{{ mp_toradix(mp, str, radix) */
+
+mp_err
+mp_toradix(mp_int *mp, char *str, int radix)
+{
+ int ix, pos = 0;
+
+ ARGCHK(mp != NULL && str != NULL, MP_BADARG);
+ ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
+
+ if (mp_cmp_z(mp) == MP_EQ) {
+ str[0] = '0';
+ str[1] = '\0';
+ } else {
+ mp_err res;
+ mp_int tmp;
+ mp_sign sgn;
+ mp_digit rem, rdx = (mp_digit)radix;
+ char ch;
+
+ if ((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
+ return res;
+
+ /* Save sign for later, and take absolute value */
+ sgn = SIGN(&tmp);
+ SIGN(&tmp) = ZPOS;
+
+ /* Generate output digits in reverse order */
+ while (mp_cmp_z(&tmp) != 0) {
+ if ((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
+ mp_clear(&tmp);
+ return res;
+ }
+
+ /* Generate digits, use capital letters */
+ ch = s_mp_todigit(rem, radix, 0);
+
+ str[pos++] = ch;
+ }
+
+ /* Add - sign if original value was negative */
+ if (sgn == NEG)
+ str[pos++] = '-';
+
+ /* Add trailing NUL to end the string */
+ str[pos--] = '\0';
+
+ /* Reverse the digits and sign indicator */
+ ix = 0;
+ while (ix < pos) {
+ char tmpc = str[ix];
+
+ str[ix] = str[pos];
+ str[pos] = tmpc;
+ ++ix;
+ --pos;
+ }
+
+ mp_clear(&tmp);
+ }
+
+ return MP_OKAY;
+
+} /* end mp_toradix() */
+
+/* }}} */
+
+/* {{{ mp_tovalue(ch, r) */
+
+int
+mp_tovalue(char ch, int r)
+{
+ return s_mp_tovalue(ch, r);
+
+} /* end mp_tovalue() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ mp_strerror(ec) */
+
+/*
+ mp_strerror(ec)
+
+ Return a string describing the meaning of error code 'ec'. The
+ string returned is allocated in static memory, so the caller should
+ not attempt to modify or free the memory associated with this
+ string.
+ */
+const char *
+mp_strerror(mp_err ec)
+{
+ int aec = (ec < 0) ? -ec : ec;
+
+ /* Code values are negative, so the senses of these comparisons
+ are accurate */
+ if (ec < MP_LAST_CODE || ec > MP_OKAY) {
+ return mp_err_string[0]; /* unknown error code */
+ } else {
+ return mp_err_string[aec + 1];
+ }
+
+} /* end mp_strerror() */
+
+/* }}} */
+
+/*========================================================================*/
+/*------------------------------------------------------------------------*/
+/* Static function definitions (internal use only) */
+
+/* {{{ Memory management */
+
+/* {{{ s_mp_grow(mp, min) */
+
+/* Make sure there are at least 'min' digits allocated to mp */
+mp_err
+s_mp_grow(mp_int *mp, mp_size min)
+{
+ ARGCHK(mp != NULL, MP_BADARG);
+
+ if (min > ALLOC(mp)) {
+ mp_digit *tmp;
+
+ /* Set min to next nearest default precision block size */
+ min = MP_ROUNDUP(min, s_mp_defprec);
+
+ if ((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL)
+ return MP_MEM;
+
+ s_mp_copy(DIGITS(mp), tmp, USED(mp));
+
+ s_mp_setz(DIGITS(mp), ALLOC(mp));
+ s_mp_free(DIGITS(mp));
+ DIGITS(mp) = tmp;
+ ALLOC(mp) = min;
+ }
+
+ return MP_OKAY;
+
+} /* end s_mp_grow() */
+
+/* }}} */
+
+/* {{{ s_mp_pad(mp, min) */
+
+/* Make sure the used size of mp is at least 'min', growing if needed */
+mp_err
+s_mp_pad(mp_int *mp, mp_size min)
+{
+ ARGCHK(mp != NULL, MP_BADARG);
+
+ if (min > USED(mp)) {
+ mp_err res;
+
+ /* Make sure there is room to increase precision */
+ if (min > ALLOC(mp)) {
+ if ((res = s_mp_grow(mp, min)) != MP_OKAY)
+ return res;
+ } else {
+ s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
+ }
+
+ /* Increase precision; should already be 0-filled */
+ USED(mp) = min;
+ }
+
+ return MP_OKAY;
+
+} /* end s_mp_pad() */
+
+/* }}} */
+
+/* {{{ s_mp_setz(dp, count) */
+
+/* Set 'count' digits pointed to by dp to be zeroes */
+void
+s_mp_setz(mp_digit *dp, mp_size count)
+{
+ memset(dp, 0, count * sizeof(mp_digit));
+} /* end s_mp_setz() */
+
+/* }}} */
+
+/* {{{ s_mp_copy(sp, dp, count) */
+
+/* Copy 'count' digits from sp to dp */
+void
+s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
+{
+ memcpy(dp, sp, count * sizeof(mp_digit));
+} /* end s_mp_copy() */
+
+/* }}} */
+
+/* {{{ s_mp_alloc(nb, ni) */
+
+/* Allocate ni records of nb bytes each, and return a pointer to that */
+void *
+s_mp_alloc(size_t nb, size_t ni)
+{
+ return calloc(nb, ni);
+
+} /* end s_mp_alloc() */
+
+/* }}} */
+
+/* {{{ s_mp_free(ptr) */
+
+/* Free the memory pointed to by ptr */
+void
+s_mp_free(void *ptr)
+{
+ if (ptr) {
+ free(ptr);
+ }
+} /* end s_mp_free() */
+
+/* }}} */
+
+/* {{{ s_mp_clamp(mp) */
+
+/* Remove leading zeroes from the given value */
+void
+s_mp_clamp(mp_int *mp)
+{
+ mp_size used = MP_USED(mp);
+ while (used > 1 && DIGIT(mp, used - 1) == 0)
+ --used;
+ MP_USED(mp) = used;
+ if (used == 1 && DIGIT(mp, 0) == 0)
+ MP_SIGN(mp) = ZPOS;
+} /* end s_mp_clamp() */
+
+/* }}} */
+
+/* {{{ s_mp_exch(a, b) */
+
+/* Exchange the data for a and b; (b, a) = (a, b) */
+void
+s_mp_exch(mp_int *a, mp_int *b)
+{
+ mp_int tmp;
+ if (!a || !b) {
+ return;
+ }
+
+ tmp = *a;
+ *a = *b;
+ *b = tmp;
+
+} /* end s_mp_exch() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Arithmetic helpers */
+
+/* {{{ s_mp_lshd(mp, p) */
+
+/*
+ Shift mp leftward by p digits, growing if needed, and zero-filling
+ the in-shifted digits at the right end. This is a convenient
+ alternative to multiplication by powers of the radix
+ */
+
+mp_err
+s_mp_lshd(mp_int *mp, mp_size p)
+{
+ mp_err res;
+ unsigned int ix;
+
+ ARGCHK(mp != NULL, MP_BADARG);
+
+ if (p == 0)
+ return MP_OKAY;
+
+ if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
+ return MP_OKAY;
+
+ if ((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
+ return res;
+
+ /* Shift all the significant figures over as needed */
+ for (ix = USED(mp) - p; ix-- > 0;) {
+ DIGIT(mp, ix + p) = DIGIT(mp, ix);
+ }
+
+ /* Fill the bottom digits with zeroes */
+ for (ix = 0; (mp_size)ix < p; ix++)
+ DIGIT(mp, ix) = 0;
+
+ return MP_OKAY;
+
+} /* end s_mp_lshd() */
+
+/* }}} */
+
+/* {{{ s_mp_mul_2d(mp, d) */
+
+/*
+ Multiply the integer by 2^d, where d is a number of bits. This
+ amounts to a bitwise shift of the value.
+ */
+mp_err
+s_mp_mul_2d(mp_int *mp, mp_digit d)
+{
+ mp_err res;
+ mp_digit dshift, rshift, mask, x, prev = 0;
+ mp_digit *pa = NULL;
+ int i;
+
+ ARGCHK(mp != NULL, MP_BADARG);
+
+ dshift = d / MP_DIGIT_BIT;
+ d %= MP_DIGIT_BIT;
+ /* mp_digit >> rshift is undefined behavior for rshift >= MP_DIGIT_BIT */
+ /* mod and corresponding mask logic avoid that when d = 0 */
+ rshift = MP_DIGIT_BIT - d;
+ rshift %= MP_DIGIT_BIT;
+ /* mask = (2**d - 1) * 2**(w-d) mod 2**w */
+ mask = (DIGIT_MAX << rshift) + 1;
+ mask &= DIGIT_MAX - 1;
+ /* bits to be shifted out of the top word */
+ x = MP_DIGIT(mp, MP_USED(mp) - 1) & mask;
+
+ if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (x != 0))))
+ return res;
+
+ if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
+ return res;
+
+ pa = MP_DIGITS(mp) + dshift;
+
+ for (i = MP_USED(mp) - dshift; i > 0; i--) {
+ x = *pa;
+ *pa++ = (x << d) | prev;
+ prev = (x & mask) >> rshift;
+ }
+
+ s_mp_clamp(mp);
+ return MP_OKAY;
+} /* end s_mp_mul_2d() */
+
+/* {{{ s_mp_rshd(mp, p) */
+
+/*
+ Shift mp rightward by p digits. Maintains the invariant that
+ digits above the precision are all zero. Digits shifted off the
+ end are lost. Cannot fail.
+ */
+
+void
+s_mp_rshd(mp_int *mp, mp_size p)
+{
+ mp_size ix;
+ mp_digit *src, *dst;
+
+ if (p == 0)
+ return;
+
+ /* Shortcut when all digits are to be shifted off */
+ if (p >= USED(mp)) {
+ s_mp_setz(DIGITS(mp), ALLOC(mp));
+ USED(mp) = 1;
+ SIGN(mp) = ZPOS;
+ return;
+ }
+
+ /* Shift all the significant figures over as needed */
+ dst = MP_DIGITS(mp);
+ src = dst + p;
+ for (ix = USED(mp) - p; ix > 0; ix--)
+ *dst++ = *src++;
+
+ MP_USED(mp) -= p;
+ /* Fill the top digits with zeroes */
+ while (p-- > 0)
+ *dst++ = 0;
+
+} /* end s_mp_rshd() */
+
+/* }}} */
+
+/* {{{ s_mp_div_2(mp) */
+
+/* Divide by two -- take advantage of radix properties to do it fast */
+void
+s_mp_div_2(mp_int *mp)
+{
+ s_mp_div_2d(mp, 1);
+
+} /* end s_mp_div_2() */
+
+/* }}} */
+
+/* {{{ s_mp_mul_2(mp) */
+
+mp_err
+s_mp_mul_2(mp_int *mp)
+{
+ mp_digit *pd;
+ unsigned int ix, used;
+ mp_digit kin = 0;
+
+ ARGCHK(mp != NULL, MP_BADARG);
+
+ /* Shift digits leftward by 1 bit */
+ used = MP_USED(mp);
+ pd = MP_DIGITS(mp);
+ for (ix = 0; ix < used; ix++) {
+ mp_digit d = *pd;
+ *pd++ = (d << 1) | kin;
+ kin = (d >> (DIGIT_BIT - 1));
+ }
+
+ /* Deal with rollover from last digit */
+ if (kin) {
+ if (ix >= ALLOC(mp)) {
+ mp_err res;
+ if ((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
+ return res;
+ }
+
+ DIGIT(mp, ix) = kin;
+ USED(mp) += 1;
+ }
+
+ return MP_OKAY;
+
+} /* end s_mp_mul_2() */
+
+/* }}} */
+
+/* {{{ s_mp_mod_2d(mp, d) */
+
+/*
+ Remainder the integer by 2^d, where d is a number of bits. This
+ amounts to a bitwise AND of the value, and does not require the full
+ division code
+ */
+void
+s_mp_mod_2d(mp_int *mp, mp_digit d)
+{
+ mp_size ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
+ mp_size ix;
+ mp_digit dmask;
+
+ if (ndig >= USED(mp))
+ return;
+
+ /* Flush all the bits above 2^d in its digit */
+ dmask = ((mp_digit)1 << nbit) - 1;
+ DIGIT(mp, ndig) &= dmask;
+
+ /* Flush all digits above the one with 2^d in it */
+ for (ix = ndig + 1; ix < USED(mp); ix++)
+ DIGIT(mp, ix) = 0;
+
+ s_mp_clamp(mp);
+
+} /* end s_mp_mod_2d() */
+
+/* }}} */
+
+/* {{{ s_mp_div_2d(mp, d) */
+
+/*
+ Divide the integer by 2^d, where d is a number of bits. This
+ amounts to a bitwise shift of the value, and does not require the
+ full division code (used in Barrett reduction, see below)
+ */
+void
+s_mp_div_2d(mp_int *mp, mp_digit d)
+{
+ int ix;
+ mp_digit save, next, mask, lshift;
+
+ s_mp_rshd(mp, d / DIGIT_BIT);
+ d %= DIGIT_BIT;
+ /* mp_digit << lshift is undefined behavior for lshift >= MP_DIGIT_BIT */
+ /* mod and corresponding mask logic avoid that when d = 0 */
+ lshift = DIGIT_BIT - d;
+ lshift %= DIGIT_BIT;
+ mask = ((mp_digit)1 << d) - 1;
+ save = 0;
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ next = DIGIT(mp, ix) & mask;
+ DIGIT(mp, ix) = (save << lshift) | (DIGIT(mp, ix) >> d);
+ save = next;
+ }
+ s_mp_clamp(mp);
+
+} /* end s_mp_div_2d() */
+
+/* }}} */
+
+/* {{{ s_mp_norm(a, b, *d) */
+
+/*
+ s_mp_norm(a, b, *d)
+
+ Normalize a and b for division, where b is the divisor. In order
+ that we might make good guesses for quotient digits, we want the
+ leading digit of b to be at least half the radix, which we
+ accomplish by multiplying a and b by a power of 2. The exponent
+ (shift count) is placed in *pd, so that the remainder can be shifted
+ back at the end of the division process.
+ */
+
+mp_err
+s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
+{
+ mp_digit d;
+ mp_digit mask;
+ mp_digit b_msd;
+ mp_err res = MP_OKAY;
+
+ ARGCHK(a != NULL && b != NULL && pd != NULL, MP_BADARG);
+
+ d = 0;
+ mask = DIGIT_MAX & ~(DIGIT_MAX >> 1); /* mask is msb of digit */
+ b_msd = DIGIT(b, USED(b) - 1);
+ while (!(b_msd & mask)) {
+ b_msd <<= 1;
+ ++d;
+ }
+
+ if (d) {
+ MP_CHECKOK(s_mp_mul_2d(a, d));
+ MP_CHECKOK(s_mp_mul_2d(b, d));
+ }
+
+ *pd = d;
+CLEANUP:
+ return res;
+
+} /* end s_mp_norm() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Primitive digit arithmetic */
+
+/* {{{ s_mp_add_d(mp, d) */
+
+/* Add d to |mp| in place */
+mp_err
+s_mp_add_d(mp_int *mp, mp_digit d) /* unsigned digit addition */
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ mp_word w, k = 0;
+ mp_size ix = 1;
+
+ w = (mp_word)DIGIT(mp, 0) + d;
+ DIGIT(mp, 0) = ACCUM(w);
+ k = CARRYOUT(w);
+
+ while (ix < USED(mp) && k) {
+ w = (mp_word)DIGIT(mp, ix) + k;
+ DIGIT(mp, ix) = ACCUM(w);
+ k = CARRYOUT(w);
+ ++ix;
+ }
+
+ if (k != 0) {
+ mp_err res;
+
+ if ((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(mp, ix) = (mp_digit)k;
+ }
+
+ return MP_OKAY;
+#else
+ mp_digit *pmp = MP_DIGITS(mp);
+ mp_digit sum, mp_i, carry = 0;
+ mp_err res = MP_OKAY;
+ int used = (int)MP_USED(mp);
+
+ mp_i = *pmp;
+ *pmp++ = sum = d + mp_i;
+ carry = (sum < d);
+ while (carry && --used > 0) {
+ mp_i = *pmp;
+ *pmp++ = sum = carry + mp_i;
+ carry = !sum;
+ }
+ if (carry && !used) {
+ /* mp is growing */
+ used = MP_USED(mp);
+ MP_CHECKOK(s_mp_pad(mp, used + 1));
+ MP_DIGIT(mp, used) = carry;
+ }
+CLEANUP:
+ return res;
+#endif
+} /* end s_mp_add_d() */
+
+/* }}} */
+
+/* {{{ s_mp_sub_d(mp, d) */
+
+/* Subtract d from |mp| in place, assumes |mp| > d */
+mp_err
+s_mp_sub_d(mp_int *mp, mp_digit d) /* unsigned digit subtract */
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ mp_word w, b = 0;
+ mp_size ix = 1;
+
+ /* Compute initial subtraction */
+ w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
+ b = CARRYOUT(w) ? 0 : 1;
+ DIGIT(mp, 0) = ACCUM(w);
+
+ /* Propagate borrows leftward */
+ while (b && ix < USED(mp)) {
+ w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
+ b = CARRYOUT(w) ? 0 : 1;
+ DIGIT(mp, ix) = ACCUM(w);
+ ++ix;
+ }
+
+ /* Remove leading zeroes */
+ s_mp_clamp(mp);
+
+ /* If we have a borrow out, it's a violation of the input invariant */
+ if (b)
+ return MP_RANGE;
+ else
+ return MP_OKAY;
+#else
+ mp_digit *pmp = MP_DIGITS(mp);
+ mp_digit mp_i, diff, borrow;
+ mp_size used = MP_USED(mp);
+
+ mp_i = *pmp;
+ *pmp++ = diff = mp_i - d;
+ borrow = (diff > mp_i);
+ while (borrow && --used) {
+ mp_i = *pmp;
+ *pmp++ = diff = mp_i - borrow;
+ borrow = (diff > mp_i);
+ }
+ s_mp_clamp(mp);
+ return (borrow && !used) ? MP_RANGE : MP_OKAY;
+#endif
+} /* end s_mp_sub_d() */
+
+/* }}} */
+
+/* {{{ s_mp_mul_d(a, d) */
+
+/* Compute a = a * d, single digit multiplication */
+mp_err
+s_mp_mul_d(mp_int *a, mp_digit d)
+{
+ mp_err res;
+ mp_size used;
+ int pow;
+
+ if (!d) {
+ mp_zero(a);
+ return MP_OKAY;
+ }
+ if (d == 1)
+ return MP_OKAY;
+ if (0 <= (pow = s_mp_ispow2d(d))) {
+ return s_mp_mul_2d(a, (mp_digit)pow);
+ }
+
+ used = MP_USED(a);
+ MP_CHECKOK(s_mp_pad(a, used + 1));
+
+ s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
+
+ s_mp_clamp(a);
+
+CLEANUP:
+ return res;
+
+} /* end s_mp_mul_d() */
+
+/* }}} */
+
+/* {{{ s_mp_div_d(mp, d, r) */
+
+/*
+ s_mp_div_d(mp, d, r)
+
+ Compute the quotient mp = mp / d and remainder r = mp mod d, for a
+ single digit d. If r is null, the remainder will be discarded.
+ */
+
+mp_err
+s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
+ mp_word w = 0, q;
+#else
+ mp_digit w = 0, q;
+#endif
+ int ix;
+ mp_err res;
+ mp_int quot;
+ mp_int rem;
+
+ if (d == 0)
+ return MP_RANGE;
+ if (d == 1) {
+ if (r)
+ *r = 0;
+ return MP_OKAY;
+ }
+ /* could check for power of 2 here, but mp_div_d does that. */
+ if (MP_USED(mp) == 1) {
+ mp_digit n = MP_DIGIT(mp, 0);
+ mp_digit remdig;
+
+ q = n / d;
+ remdig = n % d;
+ MP_DIGIT(mp, 0) = q;
+ if (r) {
+ *r = remdig;
+ }
+ return MP_OKAY;
+ }
+
+ MP_DIGITS(&rem) = 0;
+ MP_DIGITS(&quot) = 0;
+ /* Make room for the quotient */
+ MP_CHECKOK(mp_init_size(&quot, USED(mp)));
+
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ w = (w << DIGIT_BIT) | DIGIT(mp, ix);
+
+ if (w >= d) {
+ q = w / d;
+ w = w % d;
+ } else {
+ q = 0;
+ }
+
+ s_mp_lshd(&quot, 1);
+ DIGIT(&quot, 0) = (mp_digit)q;
+ }
+#else
+ {
+ mp_digit p;
+#if !defined(MP_ASSEMBLY_DIV_2DX1D)
+ mp_digit norm;
+#endif
+
+ MP_CHECKOK(mp_init_copy(&rem, mp));
+
+#if !defined(MP_ASSEMBLY_DIV_2DX1D)
+ MP_DIGIT(&quot, 0) = d;
+ MP_CHECKOK(s_mp_norm(&rem, &quot, &norm));
+ if (norm)
+ d <<= norm;
+ MP_DIGIT(&quot, 0) = 0;
+#endif
+
+ p = 0;
+ for (ix = USED(&rem) - 1; ix >= 0; ix--) {
+ w = DIGIT(&rem, ix);
+
+ if (p) {
+ MP_CHECKOK(s_mpv_div_2dx1d(p, w, d, &q, &w));
+ } else if (w >= d) {
+ q = w / d;
+ w = w % d;
+ } else {
+ q = 0;
+ }
+
+ MP_CHECKOK(s_mp_lshd(&quot, 1));
+ DIGIT(&quot, 0) = q;
+ p = w;
+ }
+#if !defined(MP_ASSEMBLY_DIV_2DX1D)
+ if (norm)
+ w >>= norm;
+#endif
+ }
+#endif
+
+ /* Deliver the remainder, if desired */
+ if (r) {
+ *r = (mp_digit)w;
+ }
+
+ s_mp_clamp(&quot);
+ mp_exch(&quot, mp);
+CLEANUP:
+ mp_clear(&quot);
+ mp_clear(&rem);
+
+ return res;
+} /* end s_mp_div_d() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Primitive full arithmetic */
+
+/* {{{ s_mp_add(a, b) */
+
+/* Compute a = |a| + |b| */
+mp_err
+s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ mp_word w = 0;
+#else
+ mp_digit d, sum, carry = 0;
+#endif
+ mp_digit *pa, *pb;
+ mp_size ix;
+ mp_size used;
+ mp_err res;
+
+ /* Make sure a has enough precision for the output value */
+ if ((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
+ return res;
+
+ /*
+ Add up all digits up to the precision of b. If b had initially
+ the same precision as a, or greater, we took care of it by the
+ padding step above, so there is no problem. If b had initially
+ less precision, we'll have to make sure the carry out is duly
+ propagated upward among the higher-order digits of the sum.
+ */
+ pa = MP_DIGITS(a);
+ pb = MP_DIGITS(b);
+ used = MP_USED(b);
+ for (ix = 0; ix < used; ix++) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ w = w + *pa + *pb++;
+ *pa++ = ACCUM(w);
+ w = CARRYOUT(w);
+#else
+ d = *pa;
+ sum = d + *pb++;
+ d = (sum < d); /* detect overflow */
+ *pa++ = sum += carry;
+ carry = d + (sum < carry); /* detect overflow */
+#endif
+ }
+
+ /* If we run out of 'b' digits before we're actually done, make
+ sure the carries get propagated upward...
+ */
+ used = MP_USED(a);
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ while (w && ix < used) {
+ w = w + *pa;
+ *pa++ = ACCUM(w);
+ w = CARRYOUT(w);
+ ++ix;
+ }
+#else
+ while (carry && ix < used) {
+ sum = carry + *pa;
+ *pa++ = sum;
+ carry = !sum;
+ ++ix;
+ }
+#endif
+
+/* If there's an overall carry out, increase precision and include
+ it. We could have done this initially, but why touch the memory
+ allocator unless we're sure we have to?
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ if (w) {
+ if ((res = s_mp_pad(a, used + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(a, ix) = (mp_digit)w;
+ }
+#else
+ if (carry) {
+ if ((res = s_mp_pad(a, used + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(a, used) = carry;
+ }
+#endif
+
+ return MP_OKAY;
+} /* end s_mp_add() */
+
+/* }}} */
+
+/* Compute c = |a| + |b| */ /* magnitude addition */
+mp_err
+s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_digit *pa, *pb, *pc;
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ mp_word w = 0;
+#else
+ mp_digit sum, carry = 0, d;
+#endif
+ mp_size ix;
+ mp_size used;
+ mp_err res;
+
+ MP_SIGN(c) = MP_SIGN(a);
+ if (MP_USED(a) < MP_USED(b)) {
+ const mp_int *xch = a;
+ a = b;
+ b = xch;
+ }
+
+ /* Make sure a has enough precision for the output value */
+ if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
+ return res;
+
+ /*
+ Add up all digits up to the precision of b. If b had initially
+ the same precision as a, or greater, we took care of it by the
+ exchange step above, so there is no problem. If b had initially
+ less precision, we'll have to make sure the carry out is duly
+ propagated upward among the higher-order digits of the sum.
+ */
+ pa = MP_DIGITS(a);
+ pb = MP_DIGITS(b);
+ pc = MP_DIGITS(c);
+ used = MP_USED(b);
+ for (ix = 0; ix < used; ix++) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ w = w + *pa++ + *pb++;
+ *pc++ = ACCUM(w);
+ w = CARRYOUT(w);
+#else
+ d = *pa++;
+ sum = d + *pb++;
+ d = (sum < d); /* detect overflow */
+ *pc++ = sum += carry;
+ carry = d + (sum < carry); /* detect overflow */
+#endif
+ }
+
+ /* If we run out of 'b' digits before we're actually done, make
+ sure the carries get propagated upward...
+ */
+ for (used = MP_USED(a); ix < used; ++ix) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ w = w + *pa++;
+ *pc++ = ACCUM(w);
+ w = CARRYOUT(w);
+#else
+ *pc++ = sum = carry + *pa++;
+ carry = (sum < carry);
+#endif
+ }
+
+/* If there's an overall carry out, increase precision and include
+ it. We could have done this initially, but why touch the memory
+ allocator unless we're sure we have to?
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ if (w) {
+ if ((res = s_mp_pad(c, used + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(c, used) = (mp_digit)w;
+ ++used;
+ }
+#else
+ if (carry) {
+ if ((res = s_mp_pad(c, used + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(c, used) = carry;
+ ++used;
+ }
+#endif
+ MP_USED(c) = used;
+ return MP_OKAY;
+}
+/* {{{ s_mp_add_offset(a, b, offset) */
+
+/* Compute a = |a| + ( |b| * (RADIX ** offset) ) */
+mp_err
+s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ mp_word w, k = 0;
+#else
+ mp_digit d, sum, carry = 0;
+#endif
+ mp_size ib;
+ mp_size ia;
+ mp_size lim;
+ mp_err res;
+
+ /* Make sure a has enough precision for the output value */
+ lim = MP_USED(b) + offset;
+ if ((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
+ return res;
+
+ /*
+ Add up all digits up to the precision of b. If b had initially
+ the same precision as a, or greater, we took care of it by the
+ padding step above, so there is no problem. If b had initially
+ less precision, we'll have to make sure the carry out is duly
+ propagated upward among the higher-order digits of the sum.
+ */
+ lim = USED(b);
+ for (ib = 0, ia = offset; ib < lim; ib++, ia++) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
+ DIGIT(a, ia) = ACCUM(w);
+ k = CARRYOUT(w);
+#else
+ d = MP_DIGIT(a, ia);
+ sum = d + MP_DIGIT(b, ib);
+ d = (sum < d);
+ MP_DIGIT(a, ia) = sum += carry;
+ carry = d + (sum < carry);
+#endif
+ }
+
+/* If we run out of 'b' digits before we're actually done, make
+ sure the carries get propagated upward...
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ for (lim = MP_USED(a); k && (ia < lim); ++ia) {
+ w = (mp_word)DIGIT(a, ia) + k;
+ DIGIT(a, ia) = ACCUM(w);
+ k = CARRYOUT(w);
+ }
+#else
+ for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
+ d = MP_DIGIT(a, ia);
+ MP_DIGIT(a, ia) = sum = d + carry;
+ carry = (sum < d);
+ }
+#endif
+
+/* If there's an overall carry out, increase precision and include
+ it. We could have done this initially, but why touch the memory
+ allocator unless we're sure we have to?
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ if (k) {
+ if ((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(a, ia) = (mp_digit)k;
+ }
+#else
+ if (carry) {
+ if ((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(a, lim) = carry;
+ }
+#endif
+ s_mp_clamp(a);
+
+ return MP_OKAY;
+
+} /* end s_mp_add_offset() */
+
+/* }}} */
+
+/* {{{ s_mp_sub(a, b) */
+
+/* Compute a = |a| - |b|, assumes |a| >= |b| */
+mp_err
+s_mp_sub(mp_int *a, const mp_int *b) /* magnitude subtract */
+{
+ mp_digit *pa, *pb, *limit;
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ mp_sword w = 0;
+#else
+ mp_digit d, diff, borrow = 0;
+#endif
+
+ /*
+ Subtract and propagate borrow. Up to the precision of b, this
+ accounts for the digits of b; after that, we just make sure the
+ carries get to the right place. This saves having to pad b out to
+ the precision of a just to make the loops work right...
+ */
+ pa = MP_DIGITS(a);
+ pb = MP_DIGITS(b);
+ limit = pb + MP_USED(b);
+ while (pb < limit) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ w = w + *pa - *pb++;
+ *pa++ = ACCUM(w);
+ w >>= MP_DIGIT_BIT;
+#else
+ d = *pa;
+ diff = d - *pb++;
+ d = (diff > d); /* detect borrow */
+ if (borrow && --diff == MP_DIGIT_MAX)
+ ++d;
+ *pa++ = diff;
+ borrow = d;
+#endif
+ }
+ limit = MP_DIGITS(a) + MP_USED(a);
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ while (w && pa < limit) {
+ w = w + *pa;
+ *pa++ = ACCUM(w);
+ w >>= MP_DIGIT_BIT;
+ }
+#else
+ while (borrow && pa < limit) {
+ d = *pa;
+ *pa++ = diff = d - borrow;
+ borrow = (diff > d);
+ }
+#endif
+
+ /* Clobber any leading zeroes we created */
+ s_mp_clamp(a);
+
+/*
+ If there was a borrow out, then |b| > |a| in violation
+ of our input invariant. We've already done the work,
+ but we'll at least complain about it...
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ return w ? MP_RANGE : MP_OKAY;
+#else
+ return borrow ? MP_RANGE : MP_OKAY;
+#endif
+} /* end s_mp_sub() */
+
+/* }}} */
+
+/* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract */
+mp_err
+s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_digit *pa, *pb, *pc;
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ mp_sword w = 0;
+#else
+ mp_digit d, diff, borrow = 0;
+#endif
+ int ix, limit;
+ mp_err res;
+
+ MP_SIGN(c) = MP_SIGN(a);
+
+ /* Make sure a has enough precision for the output value */
+ if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
+ return res;
+
+ /*
+ Subtract and propagate borrow. Up to the precision of b, this
+ accounts for the digits of b; after that, we just make sure the
+ carries get to the right place. This saves having to pad b out to
+ the precision of a just to make the loops work right...
+ */
+ pa = MP_DIGITS(a);
+ pb = MP_DIGITS(b);
+ pc = MP_DIGITS(c);
+ limit = MP_USED(b);
+ for (ix = 0; ix < limit; ++ix) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ w = w + *pa++ - *pb++;
+ *pc++ = ACCUM(w);
+ w >>= MP_DIGIT_BIT;
+#else
+ d = *pa++;
+ diff = d - *pb++;
+ d = (diff > d);
+ if (borrow && --diff == MP_DIGIT_MAX)
+ ++d;
+ *pc++ = diff;
+ borrow = d;
+#endif
+ }
+ for (limit = MP_USED(a); ix < limit; ++ix) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ w = w + *pa++;
+ *pc++ = ACCUM(w);
+ w >>= MP_DIGIT_BIT;
+#else
+ d = *pa++;
+ *pc++ = diff = d - borrow;
+ borrow = (diff > d);
+#endif
+ }
+
+ /* Clobber any leading zeroes we created */
+ MP_USED(c) = ix;
+ s_mp_clamp(c);
+
+/*
+ If there was a borrow out, then |b| > |a| in violation
+ of our input invariant. We've already done the work,
+ but we'll at least complain about it...
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ return w ? MP_RANGE : MP_OKAY;
+#else
+ return borrow ? MP_RANGE : MP_OKAY;
+#endif
+}
+/* {{{ s_mp_mul(a, b) */
+
+/* Compute a = |a| * |b| */
+mp_err
+s_mp_mul(mp_int *a, const mp_int *b)
+{
+ return mp_mul(a, b, a);
+} /* end s_mp_mul() */
+
+/* }}} */
+
+#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
+/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
+#define MP_MUL_DxD(a, b, Phi, Plo) \
+ { \
+ unsigned long long product = (unsigned long long)a * b; \
+ Plo = (mp_digit)product; \
+ Phi = (mp_digit)(product >> MP_DIGIT_BIT); \
+ }
+#else
+#define MP_MUL_DxD(a, b, Phi, Plo) \
+ { \
+ mp_digit a0b1, a1b0; \
+ Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \
+ Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
+ a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
+ a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
+ a1b0 += a0b1; \
+ Phi += a1b0 >> MP_HALF_DIGIT_BIT; \
+ Phi += (MP_CT_LTU(a1b0, a0b1)) << MP_HALF_DIGIT_BIT; \
+ a1b0 <<= MP_HALF_DIGIT_BIT; \
+ Plo += a1b0; \
+ Phi += MP_CT_LTU(Plo, a1b0); \
+ }
+#endif
+
+/* Constant time version of s_mpv_mul_d_add_prop.
+ * Presently, this is only used by the Constant time Montgomery arithmetic code. */
+/* c += a * b */
+void
+s_mpv_mul_d_add_propCT(const mp_digit *a, mp_size a_len, mp_digit b,
+ mp_digit *c, mp_size c_len)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+ mp_digit d = 0;
+
+ c_len -= a_len;
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + *c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+
+ /* propagate the carry to the end, even if carry is zero */
+ while (c_len--) {
+ mp_word w = (mp_word)*c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+#else
+ mp_digit carry = 0;
+ c_len -= a_len;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ a1b1 += MP_CT_LTU(a0b0, carry);
+ a0b0 += a_i = *c;
+ a1b1 += MP_CT_LTU(a0b0, a_i);
+
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ /* propagate the carry to the end, even if carry is zero */
+ while (c_len--) {
+ mp_digit c_i = *c;
+ carry += c_i;
+ *c++ = carry;
+ carry = MP_CT_LTU(carry, c_i);
+ }
+#endif
+}
+
+#if !defined(MP_ASSEMBLY_MULTIPLY)
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+ *c = d;
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ a1b1 += MP_CT_LTU(a0b0, carry);
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ *c = carry;
+#endif
+}
+
+/* c += a * b */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
+ mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + *c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+ *c = d;
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ a1b1 += MP_CT_LTU(a0b0, carry);
+ a0b0 += a_i = *c;
+ a1b1 += MP_CT_LTU(a0b0, a_i);
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ *c = carry;
+#endif
+}
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + *c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+
+ while (d) {
+ mp_word w = (mp_word)*c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+
+ a0b0 += a_i = *c;
+ if (a0b0 < a_i)
+ ++a1b1;
+
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ while (carry) {
+ mp_digit c_i = *c;
+ carry += c_i;
+ *c++ = carry;
+ carry = carry < c_i;
+ }
+#endif
+}
+#endif
+
+#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
+/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
+#define MP_SQR_D(a, Phi, Plo) \
+ { \
+ unsigned long long square = (unsigned long long)a * a; \
+ Plo = (mp_digit)square; \
+ Phi = (mp_digit)(square >> MP_DIGIT_BIT); \
+ }
+#else
+#define MP_SQR_D(a, Phi, Plo) \
+ { \
+ mp_digit Pmid; \
+ Plo = (a & MP_HALF_DIGIT_MAX) * (a & MP_HALF_DIGIT_MAX); \
+ Phi = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
+ Pmid = (a & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
+ Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1); \
+ Pmid <<= (MP_HALF_DIGIT_BIT + 1); \
+ Plo += Pmid; \
+ if (Plo < Pmid) \
+ ++Phi; \
+ }
+#endif
+
+#if !defined(MP_ASSEMBLY_SQUARE)
+/* Add the squares of the digits of a to the digits of b. */
+void
+s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+ mp_word w;
+ mp_digit d;
+ mp_size ix;
+
+ w = 0;
+#define ADD_SQUARE(n) \
+ d = pa[n]; \
+ w += (d * (mp_word)d) + ps[2 * n]; \
+ ps[2 * n] = ACCUM(w); \
+ w = (w >> DIGIT_BIT) + ps[2 * n + 1]; \
+ ps[2 * n + 1] = ACCUM(w); \
+ w = (w >> DIGIT_BIT)
+
+ for (ix = a_len; ix >= 4; ix -= 4) {
+ ADD_SQUARE(0);
+ ADD_SQUARE(1);
+ ADD_SQUARE(2);
+ ADD_SQUARE(3);
+ pa += 4;
+ ps += 8;
+ }
+ if (ix) {
+ ps += 2 * ix;
+ pa += ix;
+ switch (ix) {
+ case 3:
+ ADD_SQUARE(-3); /* FALLTHRU */
+ case 2:
+ ADD_SQUARE(-2); /* FALLTHRU */
+ case 1:
+ ADD_SQUARE(-1); /* FALLTHRU */
+ case 0:
+ break;
+ }
+ }
+ while (w) {
+ w += *ps;
+ *ps++ = ACCUM(w);
+ w = (w >> DIGIT_BIT);
+ }
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *pa++;
+ mp_digit a0a0, a1a1;
+
+ MP_SQR_D(a_i, a1a1, a0a0);
+
+ /* here a1a1 and a0a0 constitute a_i ** 2 */
+ a0a0 += carry;
+ if (a0a0 < carry)
+ ++a1a1;
+
+ /* now add to ps */
+ a0a0 += a_i = *ps;
+ if (a0a0 < a_i)
+ ++a1a1;
+ *ps++ = a0a0;
+ a1a1 += a_i = *ps;
+ carry = (a1a1 < a_i);
+ *ps++ = a1a1;
+ }
+ while (carry) {
+ mp_digit s_i = *ps;
+ carry += s_i;
+ *ps++ = carry;
+ carry = carry < s_i;
+ }
+#endif
+}
+#endif
+
+#if !defined(MP_ASSEMBLY_DIV_2DX1D)
+/*
+** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+** so its high bit is 1. This code is from NSPR.
+*/
+mp_err
+s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ mp_digit *qp, mp_digit *rp)
+{
+ mp_digit d1, d0, q1, q0;
+ mp_digit r1, r0, m;
+
+ d1 = divisor >> MP_HALF_DIGIT_BIT;
+ d0 = divisor & MP_HALF_DIGIT_MAX;
+ r1 = Nhi % d1;
+ q1 = Nhi / d1;
+ m = q1 * d0;
+ r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
+ if (r1 < m) {
+ q1--, r1 += divisor;
+ if (r1 >= divisor && r1 < m) {
+ q1--, r1 += divisor;
+ }
+ }
+ r1 -= m;
+ r0 = r1 % d1;
+ q0 = r1 / d1;
+ m = q0 * d0;
+ r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
+ if (r0 < m) {
+ q0--, r0 += divisor;
+ if (r0 >= divisor && r0 < m) {
+ q0--, r0 += divisor;
+ }
+ }
+ if (qp)
+ *qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
+ if (rp)
+ *rp = r0 - m;
+ return MP_OKAY;
+}
+#endif
+
+#if MP_SQUARE
+/* {{{ s_mp_sqr(a) */
+
+mp_err
+s_mp_sqr(mp_int *a)
+{
+ mp_err res;
+ mp_int tmp;
+
+ if ((res = mp_init_size(&tmp, 2 * USED(a))) != MP_OKAY)
+ return res;
+ res = mp_sqr(a, &tmp);
+ if (res == MP_OKAY) {
+ s_mp_exch(&tmp, a);
+ }
+ mp_clear(&tmp);
+ return res;
+}
+
+/* }}} */
+#endif
+
+/* {{{ s_mp_div(a, b) */
+
+/*
+ s_mp_div(a, b)
+
+ Compute a = a / b and b = a mod b. Assumes b > a.
+ */
+
+mp_err
+s_mp_div(mp_int *rem, /* i: dividend, o: remainder */
+ mp_int *div, /* i: divisor */
+ mp_int *quot) /* i: 0; o: quotient */
+{
+ mp_int part, t;
+ mp_digit q_msd;
+ mp_err res;
+ mp_digit d;
+ mp_digit div_msd;
+ int ix;
+
+ if (mp_cmp_z(div) == 0)
+ return MP_RANGE;
+
+ DIGITS(&t) = 0;
+ /* Shortcut if divisor is power of two */
+ if ((ix = s_mp_ispow2(div)) >= 0) {
+ MP_CHECKOK(mp_copy(rem, quot));
+ s_mp_div_2d(quot, (mp_digit)ix);
+ s_mp_mod_2d(rem, (mp_digit)ix);
+
+ return MP_OKAY;
+ }
+
+ MP_SIGN(rem) = ZPOS;
+ MP_SIGN(div) = ZPOS;
+ MP_SIGN(&part) = ZPOS;
+
+ /* A working temporary for division */
+ MP_CHECKOK(mp_init_size(&t, MP_ALLOC(rem)));
+
+ /* Normalize to optimize guessing */
+ MP_CHECKOK(s_mp_norm(rem, div, &d));
+
+ /* Perform the division itself...woo! */
+ MP_USED(quot) = MP_ALLOC(quot);
+
+ /* Find a partial substring of rem which is at least div */
+ /* If we didn't find one, we're finished dividing */
+ while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
+ int i;
+ int unusedRem;
+ int partExtended = 0; /* set to true if we need to extend part */
+
+ unusedRem = MP_USED(rem) - MP_USED(div);
+ MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
+ MP_ALLOC(&part) = MP_ALLOC(rem) - unusedRem;
+ MP_USED(&part) = MP_USED(div);
+
+ /* We have now truncated the part of the remainder to the same length as
+ * the divisor. If part is smaller than div, extend part by one digit. */
+ if (s_mp_cmp(&part, div) < 0) {
+ --unusedRem;
+#if MP_ARGCHK == 2
+ assert(unusedRem >= 0);
+#endif
+ --MP_DIGITS(&part);
+ ++MP_USED(&part);
+ ++MP_ALLOC(&part);
+ partExtended = 1;
+ }
+
+ /* Compute a guess for the next quotient digit */
+ q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
+ div_msd = MP_DIGIT(div, MP_USED(div) - 1);
+ if (!partExtended) {
+ /* In this case, q_msd /= div_msd is always 1. First, since div_msd is
+ * normalized to have the high bit set, 2*div_msd > MP_DIGIT_MAX. Since
+ * we didn't extend part, q_msd >= div_msd. Therefore we know that
+ * div_msd <= q_msd <= MP_DIGIT_MAX < 2*div_msd. Dividing by div_msd we
+ * get 1 <= q_msd/div_msd < 2. So q_msd /= div_msd must be 1. */
+ q_msd = 1;
+ } else {
+ if (q_msd == div_msd) {
+ q_msd = MP_DIGIT_MAX;
+ } else {
+ mp_digit r;
+ MP_CHECKOK(s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2),
+ div_msd, &q_msd, &r));
+ }
+ }
+#if MP_ARGCHK == 2
+ assert(q_msd > 0); /* This case should never occur any more. */
+#endif
+ if (q_msd <= 0)
+ break;
+
+ /* See what that multiplies out to */
+ mp_copy(div, &t);
+ MP_CHECKOK(s_mp_mul_d(&t, q_msd));
+
+ /*
+ If it's too big, back it off. We should not have to do this
+ more than once, or, in rare cases, twice. Knuth describes a
+ method by which this could be reduced to a maximum of once, but
+ I didn't implement that here.
+ When using s_mpv_div_2dx1d, we may have to do this 3 times.
+ */
+ for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
+ --q_msd;
+ MP_CHECKOK(s_mp_sub(&t, div)); /* t -= div */
+ }
+ if (i < 0) {
+ res = MP_RANGE;
+ goto CLEANUP;
+ }
+
+ /* At this point, q_msd should be the right next digit */
+ MP_CHECKOK(s_mp_sub(&part, &t)); /* part -= t */
+ s_mp_clamp(rem);
+
+ /*
+ Include the digit in the quotient. We allocated enough memory
+ for any quotient we could ever possibly get, so we should not
+ have to check for failures here
+ */
+ MP_DIGIT(quot, unusedRem) = q_msd;
+ }
+
+ /* Denormalize remainder */
+ if (d) {
+ s_mp_div_2d(rem, d);
+ }
+
+ s_mp_clamp(quot);
+
+CLEANUP:
+ mp_clear(&t);
+
+ return res;
+
+} /* end s_mp_div() */
+
+/* }}} */
+
+/* {{{ s_mp_2expt(a, k) */
+
+mp_err
+s_mp_2expt(mp_int *a, mp_digit k)
+{
+ mp_err res;
+ mp_size dig, bit;
+
+ dig = k / DIGIT_BIT;
+ bit = k % DIGIT_BIT;
+
+ mp_zero(a);
+ if ((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(a, dig) |= ((mp_digit)1 << bit);
+
+ return MP_OKAY;
+
+} /* end s_mp_2expt() */
+
+/* }}} */
+
+/* {{{ s_mp_reduce(x, m, mu) */
+
+/*
+ Compute Barrett reduction, x (mod m), given a precomputed value for
+ mu = b^2k / m, where b = RADIX and k = #digits(m). This should be
+ faster than straight division, when many reductions by the same
+ value of m are required (such as in modular exponentiation). This
+ can nearly halve the time required to do modular exponentiation,
+ as compared to using the full integer divide to reduce.
+
+ This algorithm was derived from the _Handbook of Applied
+ Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
+ pp. 603-604.
+ */
+
+mp_err
+s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
+{
+ mp_int q;
+ mp_err res;
+
+ if ((res = mp_init_copy(&q, x)) != MP_OKAY)
+ return res;
+
+ s_mp_rshd(&q, USED(m) - 1); /* q1 = x / b^(k-1) */
+ s_mp_mul(&q, mu); /* q2 = q1 * mu */
+ s_mp_rshd(&q, USED(m) + 1); /* q3 = q2 / b^(k+1) */
+
+ /* x = x mod b^(k+1), quick (no division) */
+ s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
+
+ /* q = q * m mod b^(k+1), quick (no division) */
+ s_mp_mul(&q, m);
+ s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
+
+ /* x = x - q */
+ if ((res = mp_sub(x, &q, x)) != MP_OKAY)
+ goto CLEANUP;
+
+ /* If x < 0, add b^(k+1) to it */
+ if (mp_cmp_z(x) < 0) {
+ mp_set(&q, 1);
+ if ((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = mp_add(x, &q, x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ /* Back off if it's too big */
+ while (mp_cmp(x, m) >= 0) {
+ if ((res = s_mp_sub(x, m)) != MP_OKAY)
+ break;
+ }
+
+CLEANUP:
+ mp_clear(&q);
+
+ return res;
+
+} /* end s_mp_reduce() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Primitive comparisons */
+
+/* {{{ s_mp_cmp(a, b) */
+
+/* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b */
+int
+s_mp_cmp(const mp_int *a, const mp_int *b)
+{
+ ARGMPCHK(a != NULL && b != NULL);
+
+ mp_size used_a = MP_USED(a);
+ {
+ mp_size used_b = MP_USED(b);
+
+ if (used_a > used_b)
+ goto IS_GT;
+ if (used_a < used_b)
+ goto IS_LT;
+ }
+ {
+ mp_digit *pa, *pb;
+ mp_digit da = 0, db = 0;
+
+#define CMP_AB(n) \
+ if ((da = pa[n]) != (db = pb[n])) \
+ goto done
+
+ pa = MP_DIGITS(a) + used_a;
+ pb = MP_DIGITS(b) + used_a;
+ while (used_a >= 4) {
+ pa -= 4;
+ pb -= 4;
+ used_a -= 4;
+ CMP_AB(3);
+ CMP_AB(2);
+ CMP_AB(1);
+ CMP_AB(0);
+ }
+ while (used_a-- > 0 && ((da = *--pa) == (db = *--pb)))
+ /* do nothing */;
+ done:
+ if (da > db)
+ goto IS_GT;
+ if (da < db)
+ goto IS_LT;
+ }
+ return MP_EQ;
+IS_LT:
+ return MP_LT;
+IS_GT:
+ return MP_GT;
+} /* end s_mp_cmp() */
+
+/* }}} */
+
+/* {{{ s_mp_cmp_d(a, d) */
+
+/* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d */
+int
+s_mp_cmp_d(const mp_int *a, mp_digit d)
+{
+ ARGMPCHK(a != NULL);
+
+ if (USED(a) > 1)
+ return MP_GT;
+
+ if (DIGIT(a, 0) < d)
+ return MP_LT;
+ else if (DIGIT(a, 0) > d)
+ return MP_GT;
+ else
+ return MP_EQ;
+
+} /* end s_mp_cmp_d() */
+
+/* }}} */
+
+/* {{{ s_mp_ispow2(v) */
+
+/*
+ Returns -1 if the value is not a power of two; otherwise, it returns
+ k such that v = 2^k, i.e. lg(v).
+ */
+int
+s_mp_ispow2(const mp_int *v)
+{
+ mp_digit d;
+ int extra = 0, ix;
+
+ ARGMPCHK(v != NULL);
+
+ ix = MP_USED(v) - 1;
+ d = MP_DIGIT(v, ix); /* most significant digit of v */
+
+ extra = s_mp_ispow2d(d);
+ if (extra < 0 || ix == 0)
+ return extra;
+
+ while (--ix >= 0) {
+ if (DIGIT(v, ix) != 0)
+ return -1; /* not a power of two */
+ extra += MP_DIGIT_BIT;
+ }
+
+ return extra;
+
+} /* end s_mp_ispow2() */
+
+/* }}} */
+
+/* {{{ s_mp_ispow2d(d) */
+
+int
+s_mp_ispow2d(mp_digit d)
+{
+ if ((d != 0) && ((d & (d - 1)) == 0)) { /* d is a power of 2 */
+ int pow = 0;
+#if defined(MP_USE_UINT_DIGIT)
+ if (d & 0xffff0000U)
+ pow += 16;
+ if (d & 0xff00ff00U)
+ pow += 8;
+ if (d & 0xf0f0f0f0U)
+ pow += 4;
+ if (d & 0xccccccccU)
+ pow += 2;
+ if (d & 0xaaaaaaaaU)
+ pow += 1;
+#elif defined(MP_USE_LONG_LONG_DIGIT)
+ if (d & 0xffffffff00000000ULL)
+ pow += 32;
+ if (d & 0xffff0000ffff0000ULL)
+ pow += 16;
+ if (d & 0xff00ff00ff00ff00ULL)
+ pow += 8;
+ if (d & 0xf0f0f0f0f0f0f0f0ULL)
+ pow += 4;
+ if (d & 0xccccccccccccccccULL)
+ pow += 2;
+ if (d & 0xaaaaaaaaaaaaaaaaULL)
+ pow += 1;
+#elif defined(MP_USE_LONG_DIGIT)
+ if (d & 0xffffffff00000000UL)
+ pow += 32;
+ if (d & 0xffff0000ffff0000UL)
+ pow += 16;
+ if (d & 0xff00ff00ff00ff00UL)
+ pow += 8;
+ if (d & 0xf0f0f0f0f0f0f0f0UL)
+ pow += 4;
+ if (d & 0xccccccccccccccccUL)
+ pow += 2;
+ if (d & 0xaaaaaaaaaaaaaaaaUL)
+ pow += 1;
+#else
+#error "unknown type for mp_digit"
+#endif
+ return pow;
+ }
+ return -1;
+
+} /* end s_mp_ispow2d() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Primitive I/O helpers */
+
+/* {{{ s_mp_tovalue(ch, r) */
+
+/*
+ Convert the given character to its digit value, in the given radix.
+ If the given character is not understood in the given radix, -1 is
+ returned. Otherwise the digit's numeric value is returned.
+
+ The results will be odd if you use a radix < 2 or > 62, you are
+ expected to know what you're up to.
+ */
+int
+s_mp_tovalue(char ch, int r)
+{
+ int val, xch;
+
+ if (r > 36)
+ xch = ch;
+ else
+ xch = toupper(ch);
+
+ if (isdigit(xch))
+ val = xch - '0';
+ else if (isupper(xch))
+ val = xch - 'A' + 10;
+ else if (islower(xch))
+ val = xch - 'a' + 36;
+ else if (xch == '+')
+ val = 62;
+ else if (xch == '/')
+ val = 63;
+ else
+ return -1;
+
+ if (val < 0 || val >= r)
+ return -1;
+
+ return val;
+
+} /* end s_mp_tovalue() */
+
+/* }}} */
+
+/* {{{ s_mp_todigit(val, r, low) */
+
+/*
+ Convert val to a radix-r digit, if possible. If val is out of range
+ for r, returns zero. Otherwise, returns an ASCII character denoting
+ the value in the given radix.
+
+ The results may be odd if you use a radix < 2 or > 64, you are
+ expected to know what you're doing.
+ */
+
+char
+s_mp_todigit(mp_digit val, int r, int low)
+{
+ char ch;
+
+ if (val >= r)
+ return 0;
+
+ ch = s_dmap_1[val];
+
+ if (r <= 36 && low)
+ ch = tolower(ch);
+
+ return ch;
+
+} /* end s_mp_todigit() */
+
+/* }}} */
+
+/* {{{ s_mp_outlen(bits, radix) */
+
+/*
+ Return an estimate for how long a string is needed to hold a radix
+ r representation of a number with 'bits' significant bits, plus an
+ extra for a zero terminator (assuming C style strings here)
+ */
+int
+s_mp_outlen(int bits, int r)
+{
+ return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
+
+} /* end s_mp_outlen() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ mp_read_unsigned_octets(mp, str, len) */
+/* mp_read_unsigned_octets(mp, str, len)
+ Read in a raw value (base 256) into the given mp_int
+ No sign bit, number is positive. Leading zeros ignored.
+ */
+
+mp_err
+mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
+{
+ int count;
+ mp_err res;
+ mp_digit d;
+
+ ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
+
+ mp_zero(mp);
+
+ count = len % sizeof(mp_digit);
+ if (count) {
+ for (d = 0; count-- > 0; --len) {
+ d = (d << 8) | *str++;
+ }
+ MP_DIGIT(mp, 0) = d;
+ }
+
+ /* Read the rest of the digits */
+ for (; len > 0; len -= sizeof(mp_digit)) {
+ for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
+ d = (d << 8) | *str++;
+ }
+ if (MP_EQ == mp_cmp_z(mp)) {
+ if (!d)
+ continue;
+ } else {
+ if ((res = s_mp_lshd(mp, 1)) != MP_OKAY)
+ return res;
+ }
+ MP_DIGIT(mp, 0) = d;
+ }
+ return MP_OKAY;
+} /* end mp_read_unsigned_octets() */
+/* }}} */
+
+/* {{{ mp_unsigned_octet_size(mp) */
+unsigned int
+mp_unsigned_octet_size(const mp_int *mp)
+{
+ unsigned int bytes;
+ int ix;
+ mp_digit d = 0;
+
+ ARGCHK(mp != NULL, MP_BADARG);
+ ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
+
+ bytes = (USED(mp) * sizeof(mp_digit));
+
+ /* subtract leading zeros. */
+ /* Iterate over each digit... */
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ d = DIGIT(mp, ix);
+ if (d)
+ break;
+ bytes -= sizeof(d);
+ }
+ if (!bytes)
+ return 1;
+
+ /* Have MSD, check digit bytes, high order first */
+ for (ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
+ unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
+ if (x)
+ break;
+ --bytes;
+ }
+ return bytes;
+} /* end mp_unsigned_octet_size() */
+/* }}} */
+
+/* {{{ mp_to_unsigned_octets(mp, str) */
+/* output a buffer of big endian octets no longer than specified. */
+mp_err
+mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
+{
+ int ix, pos = 0;
+ unsigned int bytes;
+
+ ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
+
+ bytes = mp_unsigned_octet_size(mp);
+ ARGCHK(bytes <= maxlen, MP_BADARG);
+
+ /* Iterate over each digit... */
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ mp_digit d = DIGIT(mp, ix);
+ int jx;
+
+ /* Unpack digit bytes, high order first */
+ for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
+ unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
+ if (!pos && !x) /* suppress leading zeros */
+ continue;
+ str[pos++] = x;
+ }
+ }
+ if (!pos)
+ str[pos++] = 0;
+ return pos;
+} /* end mp_to_unsigned_octets() */
+/* }}} */
+
+/* {{{ mp_to_signed_octets(mp, str) */
+/* output a buffer of big endian octets no longer than specified. */
+mp_err
+mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
+{
+ int ix, pos = 0;
+ unsigned int bytes;
+
+ ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
+
+ bytes = mp_unsigned_octet_size(mp);
+ ARGCHK(bytes <= maxlen, MP_BADARG);
+
+ /* Iterate over each digit... */
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ mp_digit d = DIGIT(mp, ix);
+ int jx;
+
+ /* Unpack digit bytes, high order first */
+ for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
+ unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
+ if (!pos) {
+ if (!x) /* suppress leading zeros */
+ continue;
+ if (x & 0x80) { /* add one leading zero to make output positive. */
+ ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
+ if (bytes + 1 > maxlen)
+ return MP_BADARG;
+ str[pos++] = 0;
+ }
+ }
+ str[pos++] = x;
+ }
+ }
+ if (!pos)
+ str[pos++] = 0;
+ return pos;
+} /* end mp_to_signed_octets() */
+/* }}} */
+
+/* {{{ mp_to_fixlen_octets(mp, str) */
+/* output a buffer of big endian octets exactly as long as requested.
+ constant time on the value of mp. */
+mp_err
+mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
+{
+ int ix, jx;
+ unsigned int bytes;
+
+ ARGCHK(mp != NULL && str != NULL && !SIGN(mp) && length > 0, MP_BADARG);
+
+ /* Constant time on the value of mp. Don't use mp_unsigned_octet_size. */
+ bytes = USED(mp) * MP_DIGIT_SIZE;
+
+ /* If the output is shorter than the native size of mp, then check that any
+ * bytes not written have zero values. This check isn't constant time on
+ * the assumption that timing-sensitive callers can guarantee that mp fits
+ * in the allocated space. */
+ ix = USED(mp) - 1;
+ if (bytes > length) {
+ unsigned int zeros = bytes - length;
+
+ while (zeros >= MP_DIGIT_SIZE) {
+ ARGCHK(DIGIT(mp, ix) == 0, MP_BADARG);
+ zeros -= MP_DIGIT_SIZE;
+ ix--;
+ }
+
+ if (zeros > 0) {
+ mp_digit d = DIGIT(mp, ix);
+ mp_digit m = ~0ULL << ((MP_DIGIT_SIZE - zeros) * CHAR_BIT);
+ ARGCHK((d & m) == 0, MP_BADARG);
+ for (jx = MP_DIGIT_SIZE - zeros - 1; jx >= 0; jx--) {
+ *str++ = d >> (jx * CHAR_BIT);
+ }
+ ix--;
+ }
+ } else if (bytes < length) {
+ /* Place any needed leading zeros. */
+ unsigned int zeros = length - bytes;
+ memset(str, 0, zeros);
+ str += zeros;
+ }
+
+ /* Iterate over each whole digit... */
+ for (; ix >= 0; ix--) {
+ mp_digit d = DIGIT(mp, ix);
+
+ /* Unpack digit bytes, high order first */
+ for (jx = MP_DIGIT_SIZE - 1; jx >= 0; jx--) {
+ *str++ = d >> (jx * CHAR_BIT);
+ }
+ }
+ return MP_OKAY;
+} /* end mp_to_fixlen_octets() */
+/* }}} */
+
+/* {{{ mp_cswap(condition, a, b, numdigits) */
+/* performs a conditional swap between mp_int. */
+mp_err
+mp_cswap(mp_digit condition, mp_int *a, mp_int *b, mp_size numdigits)
+{
+ mp_digit x;
+ unsigned int i;
+ mp_err res = 0;
+
+ /* if pointers are equal return */
+ if (a == b)
+ return res;
+
+ if (MP_ALLOC(a) < numdigits || MP_ALLOC(b) < numdigits) {
+ MP_CHECKOK(s_mp_grow(a, numdigits));
+ MP_CHECKOK(s_mp_grow(b, numdigits));
+ }
+
+ condition = ((~condition & ((condition - 1))) >> (MP_DIGIT_BIT - 1)) - 1;
+
+ x = (USED(a) ^ USED(b)) & condition;
+ USED(a) ^= x;
+ USED(b) ^= x;
+
+ x = (SIGN(a) ^ SIGN(b)) & condition;
+ SIGN(a) ^= x;
+ SIGN(b) ^= x;
+
+ for (i = 0; i < numdigits; i++) {
+ x = (DIGIT(a, i) ^ DIGIT(b, i)) & condition;
+ DIGIT(a, i) ^= x;
+ DIGIT(b, i) ^= x;
+ }
+
+CLEANUP:
+ return res;
+} /* end mp_cswap() */
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS */
diff --git a/security/nss/lib/freebl/mpi/mpi.h b/security/nss/lib/freebl/mpi/mpi.h
new file mode 100644
index 0000000000..dd129db0d6
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi.h
@@ -0,0 +1,363 @@
+/*
+ * mpi.h
+ *
+ * Arbitrary precision integer arithmetic library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _H_MPI_
+#define _H_MPI_
+
+#include "mpi-config.h"
+
+#include "seccomon.h"
+SEC_BEGIN_PROTOS
+
+#if MP_DEBUG
+#undef MP_IOFUNC
+#define MP_IOFUNC 1
+#endif
+
+#if MP_IOFUNC
+#include <stdio.h>
+#include <ctype.h>
+#endif
+
+#include <limits.h>
+
+#if defined(BSDI)
+#undef ULLONG_MAX
+#endif
+
+#include <sys/types.h>
+
+#define MP_NEG 1
+#define MP_ZPOS 0
+
+#define MP_OKAY 0 /* no error, all is well */
+#define MP_YES 0 /* yes (boolean result) */
+#define MP_NO -1 /* no (boolean result) */
+#define MP_MEM -2 /* out of memory */
+#define MP_RANGE -3 /* argument out of range */
+#define MP_BADARG -4 /* invalid parameter */
+#define MP_UNDEF -5 /* answer is undefined */
+#define MP_LAST_CODE MP_UNDEF
+
+typedef unsigned int mp_sign;
+typedef unsigned int mp_size;
+typedef int mp_err;
+
+#define MP_32BIT_MAX 4294967295U
+
+#if !defined(ULONG_MAX)
+#error "ULONG_MAX not defined"
+#elif !defined(UINT_MAX)
+#error "UINT_MAX not defined"
+#elif !defined(USHRT_MAX)
+#error "USHRT_MAX not defined"
+#endif
+
+#if defined(ULLONG_MAX) /* C99, Solaris */
+#define MP_ULONG_LONG_MAX ULLONG_MAX
+/* MP_ULONG_LONG_MAX was defined to be ULLONG_MAX */
+#elif defined(ULONG_LONG_MAX) /* HPUX */
+#define MP_ULONG_LONG_MAX ULONG_LONG_MAX
+#elif defined(ULONGLONG_MAX) /* AIX */
+#define MP_ULONG_LONG_MAX ULONGLONG_MAX
+#endif
+
+/* We only use unsigned long for mp_digit iff long is more than 32 bits. */
+#if !defined(MP_USE_UINT_DIGIT) && ULONG_MAX > MP_32BIT_MAX
+typedef unsigned long mp_digit;
+#define MP_DIGIT_MAX ULONG_MAX
+#define MP_DIGIT_FMT "%016lX" /* printf() format for 1 digit */
+#define MP_HALF_DIGIT_MAX UINT_MAX
+#undef MP_NO_MP_WORD
+#define MP_NO_MP_WORD 1
+#undef MP_USE_LONG_DIGIT
+#define MP_USE_LONG_DIGIT 1
+#undef MP_USE_LONG_LONG_DIGIT
+
+#elif !defined(MP_USE_UINT_DIGIT) && defined(MP_ULONG_LONG_MAX)
+typedef unsigned long long mp_digit;
+#define MP_DIGIT_MAX MP_ULONG_LONG_MAX
+#define MP_DIGIT_FMT "%016llX" /* printf() format for 1 digit */
+#define MP_HALF_DIGIT_MAX UINT_MAX
+#undef MP_NO_MP_WORD
+#define MP_NO_MP_WORD 1
+#undef MP_USE_LONG_LONG_DIGIT
+#define MP_USE_LONG_LONG_DIGIT 1
+#undef MP_USE_LONG_DIGIT
+
+#else
+typedef unsigned int mp_digit;
+#define MP_DIGIT_MAX UINT_MAX
+#define MP_DIGIT_FMT "%08X" /* printf() format for 1 digit */
+#define MP_HALF_DIGIT_MAX USHRT_MAX
+#undef MP_USE_UINT_DIGIT
+#define MP_USE_UINT_DIGIT 1
+#undef MP_USE_LONG_LONG_DIGIT
+#undef MP_USE_LONG_DIGIT
+#endif
+
+#if !defined(MP_NO_MP_WORD)
+#if defined(MP_USE_UINT_DIGIT) && \
+ (defined(MP_ULONG_LONG_MAX) || (ULONG_MAX > UINT_MAX))
+
+#if (ULONG_MAX > UINT_MAX)
+typedef unsigned long mp_word;
+typedef long mp_sword;
+#define MP_WORD_MAX ULONG_MAX
+
+#else
+typedef unsigned long long mp_word;
+typedef long long mp_sword;
+#define MP_WORD_MAX MP_ULONG_LONG_MAX
+#endif
+
+#else
+#define MP_NO_MP_WORD 1
+#endif
+#endif /* !defined(MP_NO_MP_WORD) */
+
+#if !defined(MP_WORD_MAX) && defined(MP_DEFINE_SMALL_WORD)
+typedef unsigned int mp_word;
+typedef int mp_sword;
+#define MP_WORD_MAX UINT_MAX
+#endif
+
+#define MP_DIGIT_SIZE sizeof(mp_digit)
+#define MP_DIGIT_BIT (CHAR_BIT * MP_DIGIT_SIZE)
+#define MP_WORD_BIT (CHAR_BIT * sizeof(mp_word))
+#define MP_RADIX (1 + (mp_word)MP_DIGIT_MAX)
+
+#define MP_HALF_DIGIT_BIT (MP_DIGIT_BIT / 2)
+#define MP_HALF_RADIX (1 + (mp_digit)MP_HALF_DIGIT_MAX)
+/* MP_HALF_RADIX really ought to be called MP_SQRT_RADIX, but it's named
+** MP_HALF_RADIX because it's the radix for MP_HALF_DIGITs, and it's
+** consistent with the other _HALF_ names.
+*/
+
+/* Macros for accessing the mp_int internals */
+#define MP_SIGN(MP) ((MP)->sign)
+#define MP_USED(MP) ((MP)->used)
+#define MP_ALLOC(MP) ((MP)->alloc)
+#define MP_DIGITS(MP) ((MP)->dp)
+#define MP_DIGIT(MP, N) (MP)->dp[(N)]
+
+/* This defines the maximum I/O base (minimum is 2) */
+#define MP_MAX_RADIX 64
+
+/* Constant Time Macros on mp_digits */
+#define MP_CT_HIGH_TO_LOW(x) ((mp_digit)((mp_digit)(x) >> (MP_DIGIT_BIT - 1)))
+#define MP_CT_TRUE ((mp_digit)1)
+#define MP_CT_FALSE ((mp_digit)0)
+
+/* basic zero and non zero tests */
+#define MP_CT_NOT_ZERO(x) (MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x)))))
+#define MP_CT_ZERO(x) (MP_CT_TRUE ^ MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x)))))
+
+/* basic constant-time helper macro for equalities and inequalities.
+ * The inequalities will produce incorrect results if
+ * abs(a-b) >= MP_DIGIT_SIZE/2. This can be avoided if unsigned values stay
+ * within the range 0-MP_DIGIT_MAX/2. */
+#define MP_CT_EQ(a, b) MP_CT_ZERO(((a) ^ (b)))
+#define MP_CT_NE(a, b) MP_CT_NOT_ZERO(((a) ^ (b)))
+#define MP_CT_GT(a, b) MP_CT_HIGH_TO_LOW((b) - (a))
+#define MP_CT_LT(a, b) MP_CT_HIGH_TO_LOW((a) - (b))
+#define MP_CT_GE(a, b) (MP_CT_TRUE ^ MP_CT_LT(a, b))
+#define MP_CT_LE(a, b) (MP_CT_TRUE ^ MP_CT_GT(a, b))
+
+/* use constant time result to select a boolean value
+ * or an mp digit depending on the args */
+#define MP_CT_SEL(m, l, r) ((r) ^ ((m) & ((r) ^ (l))))
+#define MP_CT_SELB(m, l, r) MP_CT_SEL(m, l, r) /* mask, l and r are booleans */
+#define MP_CT_SEL_DIGIT(m, l, r) MP_CT_SEL(m, l, r) /*mask, l, and r are mp_digit */
+
+/* full inequalities that work with full mp_digit values */
+#define MP_CT_OVERFLOW(a, b, c, d) \
+ MP_CT_SELB(MP_CT_HIGH_TO_LOW((a) ^ (b)), \
+ (MP_CT_HIGH_TO_LOW(d)), c)
+#define MP_CT_LTU(a, b) MP_CT_OVERFLOW(a, b, MP_CT_LT(a, b), b)
+
+typedef struct {
+ mp_sign sign; /* sign of this quantity */
+ mp_size alloc; /* how many digits allocated */
+ mp_size used; /* how many digits used */
+ mp_digit *dp; /* the digits themselves */
+} mp_int;
+
+/* Default precision */
+mp_size mp_get_prec(void);
+void mp_set_prec(mp_size prec);
+
+/* Memory management */
+mp_err mp_init(mp_int *mp);
+mp_err mp_init_size(mp_int *mp, mp_size prec);
+mp_err mp_init_copy(mp_int *mp, const mp_int *from);
+mp_err mp_copy(const mp_int *from, mp_int *to);
+void mp_exch(mp_int *mp1, mp_int *mp2);
+void mp_clear(mp_int *mp);
+void mp_zero(mp_int *mp);
+void mp_set(mp_int *mp, mp_digit d);
+mp_err mp_set_int(mp_int *mp, long z);
+#define mp_set_long(mp, z) mp_set_int(mp, z)
+mp_err mp_set_ulong(mp_int *mp, unsigned long z);
+
+/* Single digit arithmetic */
+mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b);
+mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b);
+mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b);
+mp_err mp_mul_2(const mp_int *a, mp_int *c);
+mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r);
+mp_err mp_div_2(const mp_int *a, mp_int *c);
+mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c);
+
+/* Sign manipulations */
+mp_err mp_abs(const mp_int *a, mp_int *b);
+mp_err mp_neg(const mp_int *a, mp_int *b);
+
+/* Full arithmetic */
+mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_subCT(const mp_int *a, mp_int *b, mp_int *c, mp_digit *borrow);
+mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize);
+#if MP_SQUARE
+mp_err mp_sqr(const mp_int *a, mp_int *b);
+#else
+#define mp_sqr(a, b) mp_mul(a, a, b)
+#endif
+mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r);
+mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r);
+mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c);
+mp_err mp_2expt(mp_int *a, mp_digit k);
+
+/* Modular arithmetic */
+#if MP_MODARITH
+mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c);
+mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c);
+mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+#if MP_SQUARE
+mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c);
+#else
+#define mp_sqrmod(a, m, c) mp_mulmod(a, a, m, c)
+#endif
+mp_err mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c);
+#endif /* MP_MODARITH */
+
+/* montgomery math */
+mp_err mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont);
+mp_digit mp_calculate_mont_n0i(const mp_int *N);
+mp_err mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *ct);
+mp_err mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i, mp_int *c);
+
+/* Comparisons */
+int mp_cmp_z(const mp_int *a);
+int mp_cmp_d(const mp_int *a, mp_digit d);
+int mp_cmp(const mp_int *a, const mp_int *b);
+int mp_cmp_mag(const mp_int *a, const mp_int *b);
+int mp_isodd(const mp_int *a);
+int mp_iseven(const mp_int *a);
+mp_err mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret);
+
+/* Number theoretic */
+mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c);
+mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c);
+mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y);
+mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c);
+mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c);
+
+/* Input and output */
+#if MP_IOFUNC
+void mp_print(mp_int *mp, FILE *ofp);
+#endif /* end MP_IOFUNC */
+
+/* Base conversion */
+mp_err mp_read_raw(mp_int *mp, char *str, int len);
+int mp_raw_size(mp_int *mp);
+mp_err mp_toraw(mp_int *mp, char *str);
+mp_err mp_read_radix(mp_int *mp, const char *str, int radix);
+mp_err mp_read_variable_radix(mp_int *a, const char *str, int default_radix);
+int mp_radix_size(mp_int *mp, int radix);
+mp_err mp_toradix(mp_int *mp, char *str, int radix);
+int mp_tovalue(char ch, int r);
+
+#define mp_tobinary(M, S) mp_toradix((M), (S), 2)
+#define mp_tooctal(M, S) mp_toradix((M), (S), 8)
+#define mp_todecimal(M, S) mp_toradix((M), (S), 10)
+#define mp_tohex(M, S) mp_toradix((M), (S), 16)
+
+/* Error strings */
+const char *mp_strerror(mp_err ec);
+
+/* Octet string conversion functions */
+mp_err mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len);
+unsigned int mp_unsigned_octet_size(const mp_int *mp);
+mp_err mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen);
+mp_err mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen);
+mp_err mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size len);
+
+/* Miscellaneous */
+mp_size mp_trailing_zeros(const mp_int *mp);
+void freebl_cpuid(unsigned long op, unsigned long *eax,
+ unsigned long *ebx, unsigned long *ecx,
+ unsigned long *edx);
+mp_err mp_cswap(mp_digit condition, mp_int *a, mp_int *b, mp_size numdigits);
+
+#define MP_CHECKOK(x) \
+ if (MP_OKAY > (res = (x))) \
+ goto CLEANUP
+#define MP_CHECKERR(x) \
+ if (MP_OKAY > (res = (x))) \
+ goto CLEANUP
+
+#define NEG MP_NEG
+#define ZPOS MP_ZPOS
+#define DIGIT_MAX MP_DIGIT_MAX
+#define DIGIT_BIT MP_DIGIT_BIT
+#define DIGIT_FMT MP_DIGIT_FMT
+#define RADIX MP_RADIX
+#define MAX_RADIX MP_MAX_RADIX
+#define SIGN(MP) MP_SIGN(MP)
+#define USED(MP) MP_USED(MP)
+#define ALLOC(MP) MP_ALLOC(MP)
+#define DIGITS(MP) MP_DIGITS(MP)
+#define DIGIT(MP, N) MP_DIGIT(MP, N)
+
+/* Functions which return an mp_err value will NULL-check their arguments via
+ * ARGCHK(condition, return), where the caller is responsible for checking the
+ * mp_err return code. For functions that return an integer type, the caller
+ * has no way to tell if the value is an error code or a legitimate value.
+ * Therefore, ARGMPCHK(condition) will trigger an assertion failure on debug
+ * builds, but no-op in optimized builds. */
+#if MP_ARGCHK == 1
+#define ARGMPCHK(X) /* */
+#define ARGCHK(X, Y) \
+ { \
+ if (!(X)) { \
+ return (Y); \
+ } \
+ }
+#elif MP_ARGCHK == 2
+#include <assert.h>
+#define ARGMPCHK(X) assert(X)
+#define ARGCHK(X, Y) assert(X)
+#else
+#define ARGMPCHK(X) /* */
+#define ARGCHK(X, Y) /* */
+#endif
+
+#ifdef CT_VERIF
+void mp_taint(mp_int *mp);
+void mp_untaint(mp_int *mp);
+#endif
+
+SEC_END_PROTOS
+
+#endif /* end _H_MPI_ */
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64.c b/security/nss/lib/freebl/mpi/mpi_amd64.c
new file mode 100644
index 0000000000..9e538bb6a1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_amd64.c
@@ -0,0 +1,32 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MPI_AMD64
+#error This file only works on AMD64 platforms.
+#endif
+
+#include <mpi-priv.h>
+
+/*
+ * MPI glue
+ *
+ */
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+void MPI_ASM_DECL
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c)
+{
+ mp_digit w;
+ mp_digit d;
+
+ d = s_mpv_mul_add_vec64(c, a, a_len, b);
+ c += a_len;
+ while (d) {
+ w = c[0] + d;
+ d = (w < c[0] || w < d);
+ *c++ = w;
+ }
+}
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_common.S b/security/nss/lib/freebl/mpi/mpi_amd64_common.S
new file mode 100644
index 0000000000..4000f2066a
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_amd64_common.S
@@ -0,0 +1,409 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+# ------------------------------------------------------------------------
+#
+# Implementation of s_mpv_mul_set_vec which exploits
+# the 64X64->128 bit unsigned multiply instruction.
+#
+# ------------------------------------------------------------------------
+
+# r = a * digit, r and a are vectors of length len
+# returns the carry digit
+# r and a are 64 bit aligned.
+#
+# uint64_t
+# s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+#
+
+.text; .align 16; .globl s_mpv_mul_set_vec64;
+
+#ifdef DARWIN
+#define s_mpv_mul_set_vec64 _s_mpv_mul_set_vec64
+.private_extern s_mpv_mul_set_vec64
+s_mpv_mul_set_vec64:
+#else
+.type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
+#endif
+
+ xorq %rax, %rax # if (len == 0) return (0)
+ testq %rdx, %rdx
+ jz .L17
+
+ movq %rdx, %r8 # Use r8 for len; %rdx is used by mul
+ xorq %r9, %r9 # cy = 0
+
+.L15:
+ cmpq $8, %r8 # 8 - len
+ jb .L16
+ movq 0(%rsi), %rax # rax = a[0]
+ movq 8(%rsi), %r11 # prefetch a[1]
+ mulq %rcx # p = a[0] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 0(%rdi) # r[0] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 16(%rsi), %r11 # prefetch a[2]
+ mulq %rcx # p = a[1] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 8(%rdi) # r[1] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 24(%rsi), %r11 # prefetch a[3]
+ mulq %rcx # p = a[2] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 16(%rdi) # r[2] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 32(%rsi), %r11 # prefetch a[4]
+ mulq %rcx # p = a[3] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 24(%rdi) # r[3] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 40(%rsi), %r11 # prefetch a[5]
+ mulq %rcx # p = a[4] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 32(%rdi) # r[4] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 48(%rsi), %r11 # prefetch a[6]
+ mulq %rcx # p = a[5] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 40(%rdi) # r[5] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 56(%rsi), %r11 # prefetch a[7]
+ mulq %rcx # p = a[6] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 48(%rdi) # r[6] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ mulq %rcx # p = a[7] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 56(%rdi) # r[7] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ addq $64, %rsi
+ addq $64, %rdi
+ subq $8, %r8
+
+ jz .L17
+ jmp .L15
+
+.L16:
+ movq 0(%rsi), %rax
+ mulq %rcx # p = a[0] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 0(%rdi) # r[0] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 8(%rsi), %rax
+ mulq %rcx # p = a[1] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 8(%rdi) # r[1] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 16(%rsi), %rax
+ mulq %rcx # p = a[2] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 16(%rdi) # r[2] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 24(%rsi), %rax
+ mulq %rcx # p = a[3] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 24(%rdi) # r[3] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 32(%rsi), %rax
+ mulq %rcx # p = a[4] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 32(%rdi) # r[4] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 40(%rsi), %rax
+ mulq %rcx # p = a[5] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 40(%rdi) # r[5] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 48(%rsi), %rax
+ mulq %rcx # p = a[6] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 48(%rdi) # r[6] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+
+.L17:
+ movq %r9, %rax
+ ret
+
+#ifndef DARWIN
+.size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64
+#endif
+
+# ------------------------------------------------------------------------
+#
+# Implementation of s_mpv_mul_add_vec which exploits
+# the 64X64->128 bit unsigned multiply instruction.
+#
+# ------------------------------------------------------------------------
+
+# r += a * digit, r and a are vectors of length len
+# returns the carry digit
+# r and a are 64 bit aligned.
+#
+# uint64_t
+# s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+#
+
+.text; .align 16; .globl s_mpv_mul_add_vec64;
+
+#ifdef DARWIN
+#define s_mpv_mul_add_vec64 _s_mpv_mul_add_vec64
+.private_extern s_mpv_mul_add_vec64
+s_mpv_mul_add_vec64:
+#else
+.type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
+#endif
+
+ xorq %rax, %rax # if (len == 0) return (0)
+ testq %rdx, %rdx
+ jz .L27
+
+ movq %rdx, %r8 # Use r8 for len; %rdx is used by mul
+ xorq %r9, %r9 # cy = 0
+
+.L25:
+ cmpq $8, %r8 # 8 - len
+ jb .L26
+ movq 0(%rsi), %rax # rax = a[0]
+ movq 0(%rdi), %r10 # r10 = r[0]
+ movq 8(%rsi), %r11 # prefetch a[1]
+ mulq %rcx # p = a[0] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[0]
+ movq 8(%rdi), %r10 # prefetch r[1]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 0(%rdi) # r[0] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 16(%rsi), %r11 # prefetch a[2]
+ mulq %rcx # p = a[1] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[1]
+ movq 16(%rdi), %r10 # prefetch r[2]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 8(%rdi) # r[1] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 24(%rsi), %r11 # prefetch a[3]
+ mulq %rcx # p = a[2] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[2]
+ movq 24(%rdi), %r10 # prefetch r[3]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 16(%rdi) # r[2] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 32(%rsi), %r11 # prefetch a[4]
+ mulq %rcx # p = a[3] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[3]
+ movq 32(%rdi), %r10 # prefetch r[4]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 24(%rdi) # r[3] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 40(%rsi), %r11 # prefetch a[5]
+ mulq %rcx # p = a[4] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[4]
+ movq 40(%rdi), %r10 # prefetch r[5]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 32(%rdi) # r[4] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 48(%rsi), %r11 # prefetch a[6]
+ mulq %rcx # p = a[5] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[5]
+ movq 48(%rdi), %r10 # prefetch r[6]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 40(%rdi) # r[5] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 56(%rsi), %r11 # prefetch a[7]
+ mulq %rcx # p = a[6] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[6]
+ movq 56(%rdi), %r10 # prefetch r[7]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 48(%rdi) # r[6] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ mulq %rcx # p = a[7] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[7]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 56(%rdi) # r[7] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ addq $64, %rsi
+ addq $64, %rdi
+ subq $8, %r8
+
+ jz .L27
+ jmp .L25
+
+.L26:
+ movq 0(%rsi), %rax
+ movq 0(%rdi), %r10
+ mulq %rcx # p = a[0] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[0]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 0(%rdi) # r[0] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 8(%rsi), %rax
+ movq 8(%rdi), %r10
+ mulq %rcx # p = a[1] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[1]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 8(%rdi) # r[1] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 16(%rsi), %rax
+ movq 16(%rdi), %r10
+ mulq %rcx # p = a[2] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[2]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 16(%rdi) # r[2] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 24(%rsi), %rax
+ movq 24(%rdi), %r10
+ mulq %rcx # p = a[3] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[3]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 24(%rdi) # r[3] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 32(%rsi), %rax
+ movq 32(%rdi), %r10
+ mulq %rcx # p = a[4] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[4]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 32(%rdi) # r[4] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 40(%rsi), %rax
+ movq 40(%rdi), %r10
+ mulq %rcx # p = a[5] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[5]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 40(%rdi) # r[5] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 48(%rsi), %rax
+ movq 48(%rdi), %r10
+ mulq %rcx # p = a[6] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[6]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 48(%rdi) # r[6] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+
+.L27:
+ movq %r9, %rax
+ ret
+
+#ifndef DARWIN
+.size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64
+
+# Magic indicating no need for an executable stack
+.section .note.GNU-stack, "", @progbits
+.previous
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm b/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm
new file mode 100644
index 0000000000..2120c18f9d
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm
@@ -0,0 +1,388 @@
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this
+; file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+;
+; This code is converted from mpi_amd64_gas.asm for MASM for x64.
+;
+
+; ------------------------------------------------------------------------
+;
+; Implementation of s_mpv_mul_set_vec which exploits
+; the 64X64->128 bit unsigned multiply instruction.
+;
+; ------------------------------------------------------------------------
+
+; r = a * digit, r and a are vectors of length len
+; returns the carry digit
+; r and a are 64 bit aligned.
+;
+; uint64_t
+; s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+;
+
+.CODE
+
+s_mpv_mul_set_vec64 PROC
+
+ ; compatibilities for paramenter registers
+ ;
+ ; About GAS and MASM, the usage of parameter registers are different.
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov edx, r8d
+ mov rcx, r9
+
+ xor rax, rax
+ test rdx, rdx
+ jz L17
+ mov r8, rdx
+ xor r9, r9
+
+L15:
+ cmp r8, 8
+ jb L16
+ mov rax, [rsi]
+ mov r11, [8+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [0+rdi], rax
+ mov r9, rdx
+ mov rax,r11
+ mov r11, [16+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [8+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [24+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [16+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [32+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [24+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [40+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [32+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [48+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [40+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [56+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [48+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [56+rdi],rax
+ mov r9,rdx
+ add rsi, 64
+ add rdi, 64
+ sub r8, 8
+ jz L17
+ jmp L15
+
+L16:
+ mov rax, [0+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx,0
+ mov [0+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L17
+ mov rax, [8+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [8+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L17
+ mov rax, [16+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [16+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L17
+ mov rax, [24+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [24+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L17
+ mov rax, [32+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [32+rdi],rax
+ mov r9, rdx
+ dec r8
+ jz L17
+ mov rax, [40+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [40+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L17
+ mov rax, [48+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [48+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L17
+
+L17:
+ mov rax, r9
+ pop rsi
+ pop rdi
+ ret
+
+s_mpv_mul_set_vec64 ENDP
+
+
+;------------------------------------------------------------------------
+;
+; Implementation of s_mpv_mul_add_vec which exploits
+; the 64X64->128 bit unsigned multiply instruction.
+;
+;------------------------------------------------------------------------
+
+; r += a * digit, r and a are vectors of length len
+; returns the carry digit
+; r and a are 64 bit aligned.
+;
+; uint64_t
+; s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+;
+
+s_mpv_mul_add_vec64 PROC
+
+ ; compatibilities for paramenter registers
+ ;
+ ; About GAS and MASM, the usage of parameter registers are different.
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov edx, r8d
+ mov rcx, r9
+
+ xor rax, rax
+ test rdx, rdx
+ jz L27
+ mov r8, rdx
+ xor r9, r9
+
+L25:
+ cmp r8, 8
+ jb L26
+ mov rax, [0+rsi]
+ mov r10, [0+rdi]
+ mov r11, [8+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [8+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [0+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [16+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [16+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [8+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [24+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [24+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [16+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [32+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [32+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [24+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [40+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [40+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [32+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [48+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [48+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [40+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [56+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [56+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [48+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [56+rdi],rax
+ mov r9,rdx
+ add rsi,64
+ add rdi,64
+ sub r8, 8
+ jz L27
+ jmp L25
+
+L26:
+ mov rax, [0+rsi]
+ mov r10, [0+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [0+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [8+rsi]
+ mov r10, [8+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [8+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [16+rsi]
+ mov r10, [16+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [16+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [24+rsi]
+ mov r10, [24+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [24+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [32+rsi]
+ mov r10, [32+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [32+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [40+rsi]
+ mov r10, [40+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [40+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [48+rsi]
+ mov r10, [48+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax, r9
+ adc rdx, 0
+ mov [48+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L27
+
+L27:
+ mov rax, r9
+
+ pop rsi
+ pop rdi
+ ret
+
+s_mpv_mul_add_vec64 ENDP
+
+END
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_sun.s b/security/nss/lib/freebl/mpi/mpi_amd64_sun.s
new file mode 100644
index 0000000000..ddd5c40fda
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_amd64_sun.s
@@ -0,0 +1,385 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+/ ------------------------------------------------------------------------
+/
+/ Implementation of s_mpv_mul_set_vec which exploits
+/ the 64X64->128 bit unsigned multiply instruction.
+/
+/ ------------------------------------------------------------------------
+
+/ r = a * digit, r and a are vectors of length len
+/ returns the carry digit
+/ r and a are 64 bit aligned.
+/
+/ uint64_t
+/ s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+/
+
+.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
+
+ xorq %rax, %rax / if (len == 0) return (0)
+ testq %rdx, %rdx
+ jz .L17
+
+ movq %rdx, %r8 / Use r8 for len; %rdx is used by mul
+ xorq %r9, %r9 / cy = 0
+
+.L15:
+ cmpq $8, %r8 / 8 - len
+ jb .L16
+ movq 0(%rsi), %rax / rax = a[0]
+ movq 8(%rsi), %r11 / prefetch a[1]
+ mulq %rcx / p = a[0] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 0(%rdi) / r[0] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 16(%rsi), %r11 / prefetch a[2]
+ mulq %rcx / p = a[1] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 8(%rdi) / r[1] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 24(%rsi), %r11 / prefetch a[3]
+ mulq %rcx / p = a[2] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 16(%rdi) / r[2] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 32(%rsi), %r11 / prefetch a[4]
+ mulq %rcx / p = a[3] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 24(%rdi) / r[3] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 40(%rsi), %r11 / prefetch a[5]
+ mulq %rcx / p = a[4] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 32(%rdi) / r[4] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 48(%rsi), %r11 / prefetch a[6]
+ mulq %rcx / p = a[5] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 40(%rdi) / r[5] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 56(%rsi), %r11 / prefetch a[7]
+ mulq %rcx / p = a[6] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 48(%rdi) / r[6] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ mulq %rcx / p = a[7] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 56(%rdi) / r[7] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ addq $64, %rsi
+ addq $64, %rdi
+ subq $8, %r8
+
+ jz .L17
+ jmp .L15
+
+.L16:
+ movq 0(%rsi), %rax
+ mulq %rcx / p = a[0] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 0(%rdi) / r[0] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 8(%rsi), %rax
+ mulq %rcx / p = a[1] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 8(%rdi) / r[1] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 16(%rsi), %rax
+ mulq %rcx / p = a[2] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 16(%rdi) / r[2] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 24(%rsi), %rax
+ mulq %rcx / p = a[3] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 24(%rdi) / r[3] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 32(%rsi), %rax
+ mulq %rcx / p = a[4] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 32(%rdi) / r[4] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 40(%rsi), %rax
+ mulq %rcx / p = a[5] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 40(%rdi) / r[5] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 48(%rsi), %rax
+ mulq %rcx / p = a[6] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 48(%rdi) / r[6] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+
+.L17:
+ movq %r9, %rax
+ ret
+
+.size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64
+
+/ ------------------------------------------------------------------------
+/
+/ Implementation of s_mpv_mul_add_vec which exploits
+/ the 64X64->128 bit unsigned multiply instruction.
+/
+/ ------------------------------------------------------------------------
+
+/ r += a * digit, r and a are vectors of length len
+/ returns the carry digit
+/ r and a are 64 bit aligned.
+/
+/ uint64_t
+/ s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+/
+
+.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
+
+ xorq %rax, %rax / if (len == 0) return (0)
+ testq %rdx, %rdx
+ jz .L27
+
+ movq %rdx, %r8 / Use r8 for len; %rdx is used by mul
+ xorq %r9, %r9 / cy = 0
+
+.L25:
+ cmpq $8, %r8 / 8 - len
+ jb .L26
+ movq 0(%rsi), %rax / rax = a[0]
+ movq 0(%rdi), %r10 / r10 = r[0]
+ movq 8(%rsi), %r11 / prefetch a[1]
+ mulq %rcx / p = a[0] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[0]
+ movq 8(%rdi), %r10 / prefetch r[1]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 0(%rdi) / r[0] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 16(%rsi), %r11 / prefetch a[2]
+ mulq %rcx / p = a[1] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[1]
+ movq 16(%rdi), %r10 / prefetch r[2]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 8(%rdi) / r[1] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 24(%rsi), %r11 / prefetch a[3]
+ mulq %rcx / p = a[2] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[2]
+ movq 24(%rdi), %r10 / prefetch r[3]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 16(%rdi) / r[2] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 32(%rsi), %r11 / prefetch a[4]
+ mulq %rcx / p = a[3] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[3]
+ movq 32(%rdi), %r10 / prefetch r[4]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 24(%rdi) / r[3] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 40(%rsi), %r11 / prefetch a[5]
+ mulq %rcx / p = a[4] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[4]
+ movq 40(%rdi), %r10 / prefetch r[5]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 32(%rdi) / r[4] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 48(%rsi), %r11 / prefetch a[6]
+ mulq %rcx / p = a[5] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[5]
+ movq 48(%rdi), %r10 / prefetch r[6]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 40(%rdi) / r[5] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 56(%rsi), %r11 / prefetch a[7]
+ mulq %rcx / p = a[6] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[6]
+ movq 56(%rdi), %r10 / prefetch r[7]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 48(%rdi) / r[6] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ mulq %rcx / p = a[7] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[7]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 56(%rdi) / r[7] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ addq $64, %rsi
+ addq $64, %rdi
+ subq $8, %r8
+
+ jz .L27
+ jmp .L25
+
+.L26:
+ movq 0(%rsi), %rax
+ movq 0(%rdi), %r10
+ mulq %rcx / p = a[0] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[0]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 0(%rdi) / r[0] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 8(%rsi), %rax
+ movq 8(%rdi), %r10
+ mulq %rcx / p = a[1] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[1]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 8(%rdi) / r[1] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 16(%rsi), %rax
+ movq 16(%rdi), %r10
+ mulq %rcx / p = a[2] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[2]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 16(%rdi) / r[2] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 24(%rsi), %rax
+ movq 24(%rdi), %r10
+ mulq %rcx / p = a[3] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[3]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 24(%rdi) / r[3] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 32(%rsi), %rax
+ movq 32(%rdi), %r10
+ mulq %rcx / p = a[4] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[4]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 32(%rdi) / r[4] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 40(%rsi), %rax
+ movq 40(%rdi), %r10
+ mulq %rcx / p = a[5] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[5]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 40(%rdi) / r[5] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 48(%rsi), %rax
+ movq 48(%rdi), %r10
+ mulq %rcx / p = a[6] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[6]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 48(%rdi) / r[6] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+
+.L27:
+ movq %r9, %rax
+ ret
+
+.size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64
diff --git a/security/nss/lib/freebl/mpi/mpi_arm.c b/security/nss/lib/freebl/mpi/mpi_arm.c
new file mode 100644
index 0000000000..27e4efdad1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_arm.c
@@ -0,0 +1,175 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This inlined version is for 32-bit ARM platform only */
+
+#if !defined(__arm__)
+#error "This is for ARM only"
+#endif
+
+/* 16-bit thumb doesn't work inlined assember version */
+#if (!defined(__thumb__) || defined(__thumb2__)) && !defined(__ARM_ARCH_3__)
+
+#include "mpi-priv.h"
+
+#ifdef MP_ASSEMBLY_MULTIPLY
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm__ __volatile__(
+ "mov r5, #0\n"
+#ifdef __thumb2__
+ "cbz %1, 2f\n"
+#else
+ "cmp %1, r5\n" /* r5 is 0 now */
+ "beq 2f\n"
+#endif
+
+ "1:\n"
+ "mov r4, #0\n"
+ "ldr r6, [%0], #4\n"
+ "umlal r5, r4, r6, %3\n"
+ "str r5, [%2], #4\n"
+ "mov r5, r4\n"
+
+ "subs %1, #1\n"
+ "bne 1b\n"
+
+ "2:\n"
+ "str r5, [%2]\n"
+ : "+r"(a), "+l"(a_len), "+r"(c)
+ : "r"(b)
+ : "memory", "cc", "%r4", "%r5", "%r6");
+}
+
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm__ __volatile__(
+ "mov r5, #0\n"
+#ifdef __thumb2__
+ "cbz %1, 2f\n"
+#else
+ "cmp %1, r5\n" /* r5 is 0 now */
+ "beq 2f\n"
+#endif
+
+ "1:\n"
+ "mov r4, #0\n"
+ "ldr r6, [%2]\n"
+ "adds r5, r6\n"
+ "adc r4, r4, #0\n"
+
+ "ldr r6, [%0], #4\n"
+ "umlal r5, r4, r6, %3\n"
+ "str r5, [%2], #4\n"
+ "mov r5, r4\n"
+
+ "subs %1, #1\n"
+ "bne 1b\n"
+
+ "2:\n"
+ "str r5, [%2]\n"
+ : "+r"(a), "+l"(a_len), "+r"(c)
+ : "r"(b)
+ : "memory", "cc", "%r4", "%r5", "%r6");
+}
+
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ if (!a_len)
+ return;
+
+ __asm__ __volatile__(
+ "mov r5, #0\n"
+
+ "1:\n"
+ "mov r4, #0\n"
+ "ldr r6, [%2]\n"
+ "adds r5, r6\n"
+ "adc r4, r4, #0\n"
+ "ldr r6, [%0], #4\n"
+ "umlal r5, r4, r6, %3\n"
+ "str r5, [%2], #4\n"
+ "mov r5, r4\n"
+
+ "subs %1, #1\n"
+ "bne 1b\n"
+
+#ifdef __thumb2__
+ "cbz r4, 3f\n"
+#else
+ "cmp r4, #0\n"
+ "beq 3f\n"
+#endif
+
+ "2:\n"
+ "mov r4, #0\n"
+ "ldr r6, [%2]\n"
+ "adds r5, r6\n"
+ "adc r4, r4, #0\n"
+ "str r5, [%2], #4\n"
+ "movs r5, r4\n"
+ "bne 2b\n"
+
+ "3:\n"
+ : "+r"(a), "+l"(a_len), "+r"(c)
+ : "r"(b)
+ : "memory", "cc", "%r4", "%r5", "%r6");
+}
+#endif
+
+#ifdef MP_ASSEMBLY_SQUARE
+void
+s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
+{
+ if (!a_len)
+ return;
+
+ __asm__ __volatile__(
+ "mov r3, #0\n"
+
+ "1:\n"
+ "mov r4, #0\n"
+ "ldr r6, [%0], #4\n"
+ "ldr r5, [%2]\n"
+ "adds r3, r5\n"
+ "adc r4, r4, #0\n"
+ "umlal r3, r4, r6, r6\n" /* w = r3:r4 */
+ "str r3, [%2], #4\n"
+
+ "ldr r5, [%2]\n"
+ "adds r3, r4, r5\n"
+ "mov r4, #0\n"
+ "adc r4, r4, #0\n"
+ "str r3, [%2], #4\n"
+ "mov r3, r4\n"
+
+ "subs %1, #1\n"
+ "bne 1b\n"
+
+#ifdef __thumb2__
+ "cbz r3, 3f\n"
+#else
+ "cmp r3, #0\n"
+ "beq 3f\n"
+#endif
+
+ "2:\n"
+ "mov r4, #0\n"
+ "ldr r5, [%2]\n"
+ "adds r3, r5\n"
+ "adc r4, r4, #0\n"
+ "str r3, [%2], #4\n"
+ "movs r3, r4\n"
+ "bne 2b\n"
+
+ "3:"
+ : "+r"(pa), "+r"(a_len), "+r"(ps)
+ :
+ : "memory", "cc", "%r3", "%r4", "%r5", "%r6");
+}
+#endif
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpi_hp.c b/security/nss/lib/freebl/mpi/mpi_hp.c
new file mode 100644
index 0000000000..0cea7685d6
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_hp.c
@@ -0,0 +1,81 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This file contains routines that perform vector multiplication. */
+
+#include "mpi-priv.h"
+#include <unistd.h>
+
+#include <stddef.h>
+/* #include <sys/systeminfo.h> */
+#include <strings.h>
+
+extern void multacc512(
+ int length, /* doublewords in multiplicand vector. */
+ const mp_digit *scalaraddr, /* Address of scalar. */
+ const mp_digit *multiplicand, /* The multiplicand vector. */
+ mp_digit *result); /* Where to accumulate the result. */
+
+extern void maxpy_little(
+ int length, /* doublewords in multiplicand vector. */
+ const mp_digit *scalaraddr, /* Address of scalar. */
+ const mp_digit *multiplicand, /* The multiplicand vector. */
+ mp_digit *result); /* Where to accumulate the result. */
+
+extern void add_diag_little(
+ int length, /* doublewords in input vector. */
+ const mp_digit *root, /* The vector to square. */
+ mp_digit *result); /* Where to accumulate the result. */
+
+void
+s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
+{
+ add_diag_little(a_len, pa, ps);
+}
+
+#define MAX_STACK_DIGITS 258
+#define MULTACC512_LEN (512 / MP_DIGIT_BIT)
+#define HP_MPY_ADD_FN (a_len == MULTACC512_LEN ? multacc512 : maxpy_little)
+
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ mp_digit x[MAX_STACK_DIGITS];
+ mp_digit *px = x;
+ size_t xSize = 0;
+
+ if (a == c) {
+ if (a_len > MAX_STACK_DIGITS) {
+ xSize = sizeof(mp_digit) * (a_len + 2);
+ px = malloc(xSize);
+ if (!px)
+ return;
+ }
+ memcpy(px, a, a_len * sizeof(*a));
+ a = px;
+ }
+ s_mp_setz(c, a_len + 1);
+ HP_MPY_ADD_FN(a_len, &b, a, c);
+ if (px != x && px) {
+ memset(px, 0, xSize);
+ free(px);
+ }
+}
+
+/* c += a * b, where a is a_len words long. */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ c[a_len] = 0; /* so carry propagation stops here. */
+ HP_MPY_ADD_FN(a_len, &b, a, c);
+}
+
+/* c += a * b, where a is y words long. */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
+ mp_digit *c)
+{
+ HP_MPY_ADD_FN(a_len, &b, a, c);
+}
diff --git a/security/nss/lib/freebl/mpi/mpi_i86pc.s b/security/nss/lib/freebl/mpi/mpi_i86pc.s
new file mode 100644
index 0000000000..f800396596
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_i86pc.s
@@ -0,0 +1,313 @@
+/
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+.text
+
+ / ebp - 36: caller's esi
+ / ebp - 32: caller's edi
+ / ebp - 28:
+ / ebp - 24:
+ / ebp - 20:
+ / ebp - 16:
+ / ebp - 12:
+ / ebp - 8:
+ / ebp - 4:
+ / ebp + 0: caller's ebp
+ / ebp + 4: return address
+ / ebp + 8: a argument
+ / ebp + 12: a_len argument
+ / ebp + 16: b argument
+ / ebp + 20: c argument
+ / registers:
+ / eax:
+ / ebx: carry
+ / ecx: a_len
+ / edx:
+ / esi: a ptr
+ / edi: c ptr
+.globl s_mpv_mul_d
+.type s_mpv_mul_d,@function
+s_mpv_mul_d:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx / carry = 0
+ mov 12(%ebp),%ecx / ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je L2 / jmp if a_len == 0
+ mov 8(%ebp),%esi / esi = a
+ cld
+L1:
+ lodsl / eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx / edx = b
+ mull %edx / edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax / add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov %edx,%ebx / high half of product becomes next carry
+
+ stosl / [es:edi] = ax; edi += 4;
+ dec %ecx / --a_len
+ jnz L1 / jmp if a_len != 0
+L2:
+ mov %ebx,0(%edi) / *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ / ebp - 36: caller's esi
+ / ebp - 32: caller's edi
+ / ebp - 28:
+ / ebp - 24:
+ / ebp - 20:
+ / ebp - 16:
+ / ebp - 12:
+ / ebp - 8:
+ / ebp - 4:
+ / ebp + 0: caller's ebp
+ / ebp + 4: return address
+ / ebp + 8: a argument
+ / ebp + 12: a_len argument
+ / ebp + 16: b argument
+ / ebp + 20: c argument
+ / registers:
+ / eax:
+ / ebx: carry
+ / ecx: a_len
+ / edx:
+ / esi: a ptr
+ / edi: c ptr
+.globl s_mpv_mul_d_add
+.type s_mpv_mul_d_add,@function
+s_mpv_mul_d_add:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx / carry = 0
+ mov 12(%ebp),%ecx / ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je L4 / jmp if a_len == 0
+ mov 8(%ebp),%esi / esi = a
+ cld
+L3:
+ lodsl / eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx / edx = b
+ mull %edx / edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax / add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx / add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx / high half of product becomes next carry
+
+ stosl / [es:edi] = ax; edi += 4;
+ dec %ecx / --a_len
+ jnz L3 / jmp if a_len != 0
+L4:
+ mov %ebx,0(%edi) / *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ / ebp - 36: caller's esi
+ / ebp - 32: caller's edi
+ / ebp - 28:
+ / ebp - 24:
+ / ebp - 20:
+ / ebp - 16:
+ / ebp - 12:
+ / ebp - 8:
+ / ebp - 4:
+ / ebp + 0: caller's ebp
+ / ebp + 4: return address
+ / ebp + 8: a argument
+ / ebp + 12: a_len argument
+ / ebp + 16: b argument
+ / ebp + 20: c argument
+ / registers:
+ / eax:
+ / ebx: carry
+ / ecx: a_len
+ / edx:
+ / esi: a ptr
+ / edi: c ptr
+.globl s_mpv_mul_d_add_prop
+.type s_mpv_mul_d_add_prop,@function
+s_mpv_mul_d_add_prop:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx / carry = 0
+ mov 12(%ebp),%ecx / ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je L6 / jmp if a_len == 0
+ cld
+ mov 8(%ebp),%esi / esi = a
+L5:
+ lodsl / eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx / edx = b
+ mull %edx / edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax / add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx / add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx / high half of product becomes next carry
+
+ stosl / [es:edi] = ax; edi += 4;
+ dec %ecx / --a_len
+ jnz L5 / jmp if a_len != 0
+L6:
+ cmp $0,%ebx / is carry zero?
+ jz L8
+ mov 0(%edi),%eax / add in current word from *c
+ add %ebx,%eax
+ stosl / [es:edi] = ax; edi += 4;
+ jnc L8
+L7:
+ mov 0(%edi),%eax / add in current word from *c
+ adc $0,%eax
+ stosl / [es:edi] = ax; edi += 4;
+ jc L7
+L8:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ / ebp - 20: caller's esi
+ / ebp - 16: caller's edi
+ / ebp - 12:
+ / ebp - 8: carry
+ / ebp - 4: a_len local
+ / ebp + 0: caller's ebp
+ / ebp + 4: return address
+ / ebp + 8: pa argument
+ / ebp + 12: a_len argument
+ / ebp + 16: ps argument
+ / ebp + 20:
+ / registers:
+ / eax:
+ / ebx: carry
+ / ecx: a_len
+ / edx:
+ / esi: a ptr
+ / edi: c ptr
+
+.globl s_mpv_sqr_add_prop
+.type s_mpv_sqr_add_prop,@function
+s_mpv_sqr_add_prop:
+ push %ebp
+ mov %esp,%ebp
+ sub $12,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx / carry = 0
+ mov 12(%ebp),%ecx / a_len
+ mov 16(%ebp),%edi / edi = ps
+ cmp $0,%ecx
+ je L11 / jump if a_len == 0
+ cld
+ mov 8(%ebp),%esi / esi = pa
+L10:
+ lodsl / %eax = [ds:si]; si += 4;
+ mull %eax
+
+ add %ebx,%eax / add "carry"
+ adc $0,%edx
+ mov 0(%edi),%ebx
+ add %ebx,%eax / add low word from result
+ mov 4(%edi),%ebx
+ stosl / [es:di] = %eax; di += 4;
+ adc %ebx,%edx / add high word from result
+ movl $0,%ebx
+ mov %edx,%eax
+ adc $0,%ebx
+ stosl / [es:di] = %eax; di += 4;
+ dec %ecx / --a_len
+ jnz L10 / jmp if a_len != 0
+L11:
+ cmp $0,%ebx / is carry zero?
+ jz L14
+ mov 0(%edi),%eax / add in current word from *c
+ add %ebx,%eax
+ stosl / [es:edi] = ax; edi += 4;
+ jnc L14
+L12:
+ mov 0(%edi),%eax / add in current word from *c
+ adc $0,%eax
+ stosl / [es:edi] = ax; edi += 4;
+ jc L12
+L14:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ /
+ / Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ / so its high bit is 1. This code is from NSPR.
+ /
+ / mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ / mp_digit *qp, mp_digit *rp)
+
+ / esp + 0: Caller's ebx
+ / esp + 4: return address
+ / esp + 8: Nhi argument
+ / esp + 12: Nlo argument
+ / esp + 16: divisor argument
+ / esp + 20: qp argument
+ / esp + 24: rp argument
+ / registers:
+ / eax:
+ / ebx: carry
+ / ecx: a_len
+ / edx:
+ / esi: a ptr
+ / edi: c ptr
+ /
+
+.globl s_mpv_div_2dx1d
+.type s_mpv_div_2dx1d,@function
+s_mpv_div_2dx1d:
+ push %ebx
+ mov 8(%esp),%edx
+ mov 12(%esp),%eax
+ mov 16(%esp),%ebx
+ div %ebx
+ mov 20(%esp),%ebx
+ mov %eax,0(%ebx)
+ mov 24(%esp),%ebx
+ mov %edx,0(%ebx)
+ xor %eax,%eax / return zero
+ pop %ebx
+ ret
+ nop
+
diff --git a/security/nss/lib/freebl/mpi/mpi_mips.s b/security/nss/lib/freebl/mpi/mpi_mips.s
new file mode 100644
index 0000000000..455792bbba
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_mips.s
@@ -0,0 +1,472 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include <regdef.h>
+ .set noreorder
+ .set noat
+
+ .section .text, 1, 0x00000006, 4, 4
+.text:
+ .section .text
+
+ .ent s_mpv_mul_d_add
+ .globl s_mpv_mul_d_add
+
+s_mpv_mul_d_add:
+ #/* c += a * b */
+ #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
+ # mp_digit *c)
+ #{
+ # mp_digit a0, a1; regs a4, a5
+ # mp_digit c0, c1; regs a6, a7
+ # mp_digit cy = 0; reg t2
+ # mp_word w0, w1; regs t0, t1
+ #
+ # if (a_len) {
+ beq a1,zero,.L.1
+ move t2,zero # cy = 0
+ dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
+ dsrl32 a2,a2,0 # This clears the upper 32 bits.
+ # a0 = a[0];
+ lwu a4,0(a0)
+ # w0 = ((mp_word)b * a0);
+ dmultu a2,a4
+ # if (--a_len) {
+ addiu a1,a1,-1
+ beq a1,zero,.L.2
+ # while (a_len >= 2) {
+ sltiu t3,a1,2
+ bne t3,zero,.L.3
+ # a1 = a[1];
+ lwu a5,4(a0)
+.L.4:
+ # a_len -= 2;
+ addiu a1,a1,-2
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += cy;
+ mflo t0
+ daddu t0,t0,t2
+ # w0 += c0;
+ daddu t0,t0,a6
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # a0 = a[2];
+ lwu a4,8(a0)
+ # a += 2;
+ addiu a0,a0,8
+ # c1 = c[1];
+ lwu a7,4(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # w1 += c1;
+ daddu t1,t1,a7
+ # w0 = (mp_word)b * a0;
+ dmultu a2,a4 #
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # c += 2;
+ addiu a3,a3,8
+ sltiu t3,a1,2
+ beq t3,zero,.L.4
+ # a1 = a[1];
+ lwu a5,4(a0)
+ # }
+.L.3:
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += cy;
+ # if (a_len) {
+ mflo t0
+ beq a1,zero,.L.5
+ daddu t0,t0,t2
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5
+ # w0 += c0;
+ daddu t0,t0,a6 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # c1 = c[1];
+ lwu a7,4(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # w1 += c1;
+ daddu t1,t1,a7
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c += 1;
+ b .L.6
+ addiu a3,a3,4
+ # } else {
+.L.5:
+ # w0 += c0;
+ daddu t0,t0,a6
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ b .L.6
+ dsrl32 t2,t0,0
+ # }
+ # } else {
+.L.2:
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += c0;
+ mflo t0
+ daddu t0,t0,a6
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # }
+.L.6:
+ # c[1] = cy;
+ jr ra
+ sw t2,4(a3)
+ # }
+.L.1:
+ jr ra
+ nop
+ #}
+ #
+ .end s_mpv_mul_d_add
+
+ .ent s_mpv_mul_d_add_prop
+ .globl s_mpv_mul_d_add_prop
+
+s_mpv_mul_d_add_prop:
+ #/* c += a * b */
+ #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
+ # mp_digit *c)
+ #{
+ # mp_digit a0, a1; regs a4, a5
+ # mp_digit c0, c1; regs a6, a7
+ # mp_digit cy = 0; reg t2
+ # mp_word w0, w1; regs t0, t1
+ #
+ # if (a_len) {
+ beq a1,zero,.M.1
+ move t2,zero # cy = 0
+ dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
+ dsrl32 a2,a2,0 # This clears the upper 32 bits.
+ # a0 = a[0];
+ lwu a4,0(a0)
+ # w0 = ((mp_word)b * a0);
+ dmultu a2,a4
+ # if (--a_len) {
+ addiu a1,a1,-1
+ beq a1,zero,.M.2
+ # while (a_len >= 2) {
+ sltiu t3,a1,2
+ bne t3,zero,.M.3
+ # a1 = a[1];
+ lwu a5,4(a0)
+.M.4:
+ # a_len -= 2;
+ addiu a1,a1,-2
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += cy;
+ mflo t0
+ daddu t0,t0,t2
+ # w0 += c0;
+ daddu t0,t0,a6
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # a0 = a[2];
+ lwu a4,8(a0)
+ # a += 2;
+ addiu a0,a0,8
+ # c1 = c[1];
+ lwu a7,4(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # w1 += c1;
+ daddu t1,t1,a7
+ # w0 = (mp_word)b * a0;
+ dmultu a2,a4 #
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # c += 2;
+ addiu a3,a3,8
+ sltiu t3,a1,2
+ beq t3,zero,.M.4
+ # a1 = a[1];
+ lwu a5,4(a0)
+ # }
+.M.3:
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += cy;
+ # if (a_len) {
+ mflo t0
+ beq a1,zero,.M.5
+ daddu t0,t0,t2
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5
+ # w0 += c0;
+ daddu t0,t0,a6 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # c1 = c[1];
+ lwu a7,4(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # w1 += c1;
+ daddu t1,t1,a7
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c += 1;
+ b .M.6
+ addiu a3,a3,8
+ # } else {
+.M.5:
+ # w0 += c0;
+ daddu t0,t0,a6
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ b .M.6
+ addiu a3,a3,4
+ # }
+ # } else {
+.M.2:
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += c0;
+ mflo t0
+ daddu t0,t0,a6
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ addiu a3,a3,4
+ # }
+.M.6:
+
+ # while (cy) {
+ beq t2,zero,.M.1
+ nop
+.M.7:
+ # mp_word w = (mp_word)*c + cy;
+ lwu a6,0(a3)
+ daddu t2,t2,a6
+ # *c++ = ACCUM(w);
+ sw t2,0(a3)
+ # cy = CARRYOUT(w);
+ dsrl32 t2,t2,0
+ bne t2,zero,.M.7
+ addiu a3,a3,4
+
+ # }
+.M.1:
+ jr ra
+ nop
+ #}
+ #
+ .end s_mpv_mul_d_add_prop
+
+ .ent s_mpv_mul_d
+ .globl s_mpv_mul_d
+
+s_mpv_mul_d:
+ #/* c = a * b */
+ #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b,
+ # mp_digit *c)
+ #{
+ # mp_digit a0, a1; regs a4, a5
+ # mp_digit cy = 0; reg t2
+ # mp_word w0, w1; regs t0, t1
+ #
+ # if (a_len) {
+ beq a1,zero,.N.1
+ move t2,zero # cy = 0
+ dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
+ dsrl32 a2,a2,0 # This clears the upper 32 bits.
+ # a0 = a[0];
+ lwu a4,0(a0)
+ # w0 = ((mp_word)b * a0);
+ dmultu a2,a4
+ # if (--a_len) {
+ addiu a1,a1,-1
+ beq a1,zero,.N.2
+ # while (a_len >= 2) {
+ sltiu t3,a1,2
+ bne t3,zero,.N.3
+ # a1 = a[1];
+ lwu a5,4(a0)
+.N.4:
+ # a_len -= 2;
+ addiu a1,a1,-2
+ # w0 += cy;
+ mflo t0
+ daddu t0,t0,t2
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # a0 = a[2];
+ lwu a4,8(a0)
+ # a += 2;
+ addiu a0,a0,8
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # w0 = (mp_word)b * a0;
+ dmultu a2,a4
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # c += 2;
+ addiu a3,a3,8
+ sltiu t3,a1,2
+ beq t3,zero,.N.4
+ # a1 = a[1];
+ lwu a5,4(a0)
+ # }
+.N.3:
+ # w0 += cy;
+ # if (a_len) {
+ mflo t0
+ beq a1,zero,.N.5
+ daddu t0,t0,t2
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c += 1;
+ b .N.6
+ addiu a3,a3,4
+ # } else {
+.N.5:
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ b .N.6
+ dsrl32 t2,t0,0
+ # }
+ # } else {
+.N.2:
+ mflo t0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # }
+.N.6:
+ # c[1] = cy;
+ jr ra
+ sw t2,4(a3)
+ # }
+.N.1:
+ jr ra
+ nop
+ #}
+ #
+ .end s_mpv_mul_d
+
+
+ .ent s_mpv_sqr_add_prop
+ .globl s_mpv_sqr_add_prop
+ #void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs);
+ # registers
+ # a0 *a
+ # a1 a_len
+ # a2 *sqr
+ # a3 digit from *a, a_i
+ # a4 square of digit from a
+ # a5,a6 next 2 digits in sqr
+ # a7,t0 carry
+s_mpv_sqr_add_prop:
+ move a7,zero
+ move t0,zero
+ lwu a3,0(a0)
+ addiu a1,a1,-1 # --a_len
+ dmultu a3,a3
+ beq a1,zero,.P.3 # jump if we've already done the only sqr
+ addiu a0,a0,4 # ++a
+.P.2:
+ lwu a5,0(a2)
+ lwu a6,4(a2)
+ addiu a2,a2,8 # sqrs += 2;
+ dsll32 a6,a6,0
+ daddu a5,a5,a6
+ lwu a3,0(a0)
+ addiu a0,a0,4 # ++a
+ mflo a4
+ daddu a6,a5,a4
+ sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
+ dmultu a3,a3
+ daddu a4,a6,t0
+ sltu t0,a4,a6
+ add t0,t0,a7
+ sw a4,-8(a2)
+ addiu a1,a1,-1 # --a_len
+ dsrl32 a4,a4,0
+ bne a1,zero,.P.2 # loop if a_len > 0
+ sw a4,-4(a2)
+.P.3:
+ lwu a5,0(a2)
+ lwu a6,4(a2)
+ addiu a2,a2,8 # sqrs += 2;
+ dsll32 a6,a6,0
+ daddu a5,a5,a6
+ mflo a4
+ daddu a6,a5,a4
+ sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
+ daddu a4,a6,t0
+ sltu t0,a4,a6
+ add t0,t0,a7
+ sw a4,-8(a2)
+ beq t0,zero,.P.9 # jump if no carry
+ dsrl32 a4,a4,0
+.P.8:
+ sw a4,-4(a2)
+ /* propagate final carry */
+ lwu a5,0(a2)
+ daddu a6,a5,t0
+ sltu t0,a6,a5
+ bne t0,zero,.P.8 # loop if carry persists
+ addiu a2,a2,4 # sqrs++
+.P.9:
+ jr ra
+ sw a4,-4(a2)
+
+ .end s_mpv_sqr_add_prop
diff --git a/security/nss/lib/freebl/mpi/mpi_sparc.c b/security/nss/lib/freebl/mpi/mpi_sparc.c
new file mode 100644
index 0000000000..1e88357af8
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_sparc.c
@@ -0,0 +1,226 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Multiplication performance enhancements for sparc v8+vis CPUs. */
+
+#include "mpi-priv.h"
+#include <stddef.h>
+#include <sys/systeminfo.h>
+#include <strings.h>
+
+/* In the functions below, */
+/* vector y must be 8-byte aligned, and n must be even */
+/* returns carry out of high order word of result */
+/* maximum n is 256 */
+
+/* vector x += vector y * scaler a; where y is of length n words. */
+extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a);
+
+/* vector z = vector x + vector y * scaler a; where y is of length n words. */
+extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y,
+ int n, mp_digit a);
+
+/* v8 versions of these functions run on any Sparc v8 CPU. */
+
+/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
+#define MP_MUL_DxD(a, b, Phi, Plo) \
+ { \
+ unsigned long long product = (unsigned long long)a * b; \
+ Plo = (mp_digit)product; \
+ Phi = (mp_digit)(product >> MP_DIGIT_BIT); \
+ }
+
+/* c = a * b */
+static void
+v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+ *c = d;
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ *c = carry;
+#endif
+}
+
+/* c += a * b */
+static void
+v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + *c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+ *c = d;
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+ a0b0 += a_i = *c;
+ if (a0b0 < a_i)
+ ++a1b1;
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ *c = carry;
+#endif
+}
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+static void
+v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + *c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+
+ while (d) {
+ mp_word w = (mp_word)*c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+
+ a0b0 += a_i = *c;
+ if (a0b0 < a_i)
+ ++a1b1;
+
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ while (carry) {
+ mp_digit c_i = *c;
+ carry += c_i;
+ *c++ = carry;
+ carry = carry < c_i;
+ }
+#endif
+}
+
+/* These functions run only on v8plus+vis or v9+vis CPUs. */
+
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ mp_digit d;
+ mp_digit x[258];
+ if (a_len <= 256) {
+ if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
+ mp_digit *px;
+ px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
+ memcpy(px, a, a_len * sizeof(*a));
+ a = px;
+ if (a_len & 1) {
+ px[a_len] = 0;
+ }
+ }
+ s_mp_setz(c, a_len + 1);
+ d = mul_add_inp(c, a, a_len, b);
+ c[a_len] = d;
+ } else {
+ v8_mpv_mul_d(a, a_len, b, c);
+ }
+}
+
+/* c += a * b, where a is a_len words long. */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ mp_digit d;
+ mp_digit x[258];
+ if (a_len <= 256) {
+ if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
+ mp_digit *px;
+ px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
+ memcpy(px, a, a_len * sizeof(*a));
+ a = px;
+ if (a_len & 1) {
+ px[a_len] = 0;
+ }
+ }
+ d = mul_add_inp(c, a, a_len, b);
+ c[a_len] = d;
+ } else {
+ v8_mpv_mul_d_add(a, a_len, b, c);
+ }
+}
+
+/* c += a * b, where a is y words long. */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ mp_digit d;
+ mp_digit x[258];
+ if (a_len <= 256) {
+ if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
+ mp_digit *px;
+ px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
+ memcpy(px, a, a_len * sizeof(*a));
+ a = px;
+ if (a_len & 1) {
+ px[a_len] = 0;
+ }
+ }
+ d = mul_add_inp(c, a, a_len, b);
+ if (d) {
+ c += a_len;
+ do {
+ mp_digit sum = d + *c;
+ *c++ = sum;
+ d = sum < d;
+ } while (d);
+ }
+ } else {
+ v8_mpv_mul_d_add_prop(a, a_len, b, c);
+ }
+}
diff --git a/security/nss/lib/freebl/mpi/mpi_sse2.s b/security/nss/lib/freebl/mpi/mpi_sse2.s
new file mode 100644
index 0000000000..16a47019c3
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_sse2.s
@@ -0,0 +1,294 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifdef DARWIN
+#define s_mpv_mul_d _s_mpv_mul_d
+#define s_mpv_mul_d_add _s_mpv_mul_d_add
+#define s_mpv_mul_d_add_prop _s_mpv_mul_d_add_prop
+#define s_mpv_sqr_add_prop _s_mpv_sqr_add_prop
+#define s_mpv_div_2dx1d _s_mpv_div_2dx1d
+#define TYPE_FUNCTION(x)
+#else
+#define TYPE_FUNCTION(x) .type x, @function
+#endif
+
+.text
+
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # ebx:
+ # ecx: a_len
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d
+.private_extern s_mpv_mul_d
+TYPE_FUNCTION(s_mpv_mul_d)
+s_mpv_mul_d:
+ push %ebp
+ mov %esp, %ebp
+ push %edi
+ push %esi
+ psubq %mm2, %mm2 # carry = 0
+ mov 12(%ebp), %ecx # ecx = a_len
+ movd 16(%ebp), %mm1 # mm1 = b
+ mov 20(%ebp), %edi
+ cmp $0, %ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp), %esi # esi = a
+ cld
+1:
+ movd 0(%esi), %mm0 # mm0 = *a++
+ add $4, %esi
+ pmuludq %mm1, %mm0 # mm0 = b * *a++
+ paddq %mm0, %mm2 # add the carry
+ movd %mm2, 0(%edi) # store the 32bit result
+ add $4, %edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ movd %mm2, 0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # ebx:
+ # ecx: a_len
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d_add
+.private_extern s_mpv_mul_d_add
+TYPE_FUNCTION(s_mpv_mul_d_add)
+s_mpv_mul_d_add:
+ push %ebp
+ mov %esp, %ebp
+ push %edi
+ push %esi
+ psubq %mm2, %mm2 # carry = 0
+ mov 12(%ebp), %ecx # ecx = a_len
+ movd 16(%ebp), %mm1 # mm1 = b
+ mov 20(%ebp), %edi
+ cmp $0, %ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp), %esi # esi = a
+ cld
+1:
+ movd 0(%esi), %mm0 # mm0 = *a++
+ add $4, %esi
+ pmuludq %mm1, %mm0 # mm0 = b * *a++
+ paddq %mm0, %mm2 # add the carry
+ movd 0(%edi), %mm0
+ paddq %mm0, %mm2 # add the carry
+ movd %mm2, 0(%edi) # store the 32bit result
+ add $4, %edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ movd %mm2, 0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 12: caller's ebx
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d_add_prop
+.private_extern s_mpv_mul_d_add_prop
+TYPE_FUNCTION(s_mpv_mul_d_add_prop)
+s_mpv_mul_d_add_prop:
+ push %ebp
+ mov %esp, %ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2, %mm2 # carry = 0
+ mov 12(%ebp), %ecx # ecx = a_len
+ movd 16(%ebp), %mm1 # mm1 = b
+ mov 20(%ebp), %edi
+ cmp $0, %ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp), %esi # esi = a
+ cld
+1:
+ movd 0(%esi), %mm0 # mm0 = *a++
+ movd 0(%edi), %mm3 # fetch the sum
+ add $4, %esi
+ pmuludq %mm1, %mm0 # mm0 = b * *a++
+ paddq %mm0, %mm2 # add the carry
+ paddq %mm3, %mm2 # add *c++
+ movd %mm2, 0(%edi) # store the 32bit result
+ add $4, %edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ movd %mm2, %ebx
+ cmp $0, %ebx # is carry zero?
+ jz 4f
+ mov 0(%edi), %eax
+ add %ebx, %eax
+ stosl
+ jnc 4f
+3:
+ mov 0(%edi), %eax # add in current word from *c
+ adc $0, %eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 3b
+4:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 12: caller's ebx
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: pa argument
+ # ebp + 12: a_len argument
+ # ebp + 16: ps argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_sqr_add_prop
+.private_extern s_mpv_sqr_add_prop
+TYPE_FUNCTION(s_mpv_sqr_add_prop)
+s_mpv_sqr_add_prop:
+ push %ebp
+ mov %esp, %ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2, %mm2 # carry = 0
+ mov 12(%ebp), %ecx # ecx = a_len
+ mov 16(%ebp), %edi
+ cmp $0, %ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp), %esi # esi = a
+ cld
+1:
+ movd 0(%esi), %mm0 # mm0 = *a
+ movd 0(%edi), %mm3 # fetch the sum
+ add $4, %esi
+ pmuludq %mm0, %mm0 # mm0 = sqr(a)
+ paddq %mm0, %mm2 # add the carry
+ paddq %mm3, %mm2 # add the low word
+ movd 4(%edi), %mm3
+ movd %mm2, 0(%edi) # store the 32bit result
+ psrlq $32, %mm2
+ paddq %mm3, %mm2 # add the high word
+ movd %mm2, 4(%edi) # store the 32bit result
+ psrlq $32, %mm2 # save the carry.
+ add $8, %edi
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ movd %mm2, %ebx
+ cmp $0, %ebx # is carry zero?
+ jz 4f
+ mov 0(%edi), %eax
+ add %ebx, %eax
+ stosl
+ jnc 4f
+3:
+ mov 0(%edi), %eax # add in current word from *c
+ adc $0, %eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 3b
+4:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ #
+ # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ # so its high bit is 1. This code is from NSPR.
+ #
+ # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ # mp_digit *qp, mp_digit *rp)
+
+ # esp + 0: Caller's ebx
+ # esp + 4: return address
+ # esp + 8: Nhi argument
+ # esp + 12: Nlo argument
+ # esp + 16: divisor argument
+ # esp + 20: qp argument
+ # esp + 24: rp argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+ #
+.globl s_mpv_div_2dx1d
+.private_extern s_mpv_div_2dx1d
+TYPE_FUNCTION(s_mpv_div_2dx1d)
+s_mpv_div_2dx1d:
+ push %ebx
+ mov 8(%esp), %edx
+ mov 12(%esp), %eax
+ mov 16(%esp), %ebx
+ div %ebx
+ mov 20(%esp), %ebx
+ mov %eax, 0(%ebx)
+ mov 24(%esp), %ebx
+ mov %edx, 0(%ebx)
+ xor %eax, %eax # return zero
+ pop %ebx
+ ret
+ nop
+
+#ifndef DARWIN
+ # Magic indicating no need for an executable stack
+.section .note.GNU-stack, "", @progbits
+.previous
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpi_x86.s b/security/nss/lib/freebl/mpi/mpi_x86.s
new file mode 100644
index 0000000000..8f7e2130c3
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_x86.s
@@ -0,0 +1,541 @@
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+.data
+.align 4
+ #
+ # -1 means to call s_mpi_is_sse to determine if we support sse
+ # instructions.
+ # 0 means to use x86 instructions
+ # 1 means to use sse2 instructions
+.type is_sse,@object
+.size is_sse,4
+is_sse: .long -1
+
+#
+# sigh, handle the difference between -fPIC and not PIC
+# default to pic, since this file seems to be exclusively
+# linux right now (solaris uses mpi_i86pc.s and windows uses
+# mpi_x86_asm.c)
+#
+.ifndef NO_PIC
+.macro GET var,reg
+ movl \var@GOTOFF(%ebx),\reg
+.endm
+.macro PUT reg,var
+ movl \reg,\var@GOTOFF(%ebx)
+.endm
+.else
+.macro GET var,reg
+ movl \var,\reg
+.endm
+.macro PUT reg,var
+ movl \reg,\var
+.endm
+.endif
+
+.text
+
+
+ # ebp - 36: caller's esi
+ # ebp - 32: caller's edi
+ # ebp - 28:
+ # ebp - 24:
+ # ebp - 20:
+ # ebp - 16:
+ # ebp - 12:
+ # ebp - 8:
+ # ebp - 4:
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d
+.type s_mpv_mul_d,@function
+s_mpv_mul_d:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je s_mpv_mul_d_x86
+ jg s_mpv_mul_d_sse2
+ call s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg s_mpv_mul_d_sse2
+s_mpv_mul_d_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+1:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ mov %ebx,0(%edi) # *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 6f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+5:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 5b # jmp if a_len != 0
+6:
+ movd %mm2,0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 36: caller's esi
+ # ebp - 32: caller's edi
+ # ebp - 28:
+ # ebp - 24:
+ # ebp - 20:
+ # ebp - 16:
+ # ebp - 12:
+ # ebp - 8:
+ # ebp - 4:
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d_add
+.type s_mpv_mul_d_add,@function
+s_mpv_mul_d_add:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je s_mpv_mul_d_add_x86
+ jg s_mpv_mul_d_add_sse2
+ call s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg s_mpv_mul_d_add_sse2
+s_mpv_mul_d_add_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 11f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+10:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx # add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 10b # jmp if a_len != 0
+11:
+ mov %ebx,0(%edi) # *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_add_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 16f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+15:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ movd 0(%edi),%mm0
+ paddq %mm0,%mm2 # add the carry
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 15b # jmp if a_len != 0
+16:
+ movd %mm2,0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d_add_prop
+.type s_mpv_mul_d_add_prop,@function
+s_mpv_mul_d_add_prop:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je s_mpv_mul_d_add_prop_x86
+ jg s_mpv_mul_d_add_prop_sse2
+ call s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg s_mpv_mul_d_add_prop_sse2
+s_mpv_mul_d_add_prop_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 21f # jmp if a_len == 0
+ cld
+ mov 8(%ebp),%esi # esi = a
+20:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx # add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 20b # jmp if a_len != 0
+21:
+ cmp $0,%ebx # is carry zero?
+ jz 23f
+ mov 0(%edi),%eax # add in current word from *c
+ add %ebx,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jnc 23f
+22:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 22b
+23:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_add_prop_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 26f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+25:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ movd 0(%edi),%mm3 # fetch the sum
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ paddq %mm3,%mm2 # add *c++
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 25b # jmp if a_len != 0
+26:
+ movd %mm2,%ebx
+ cmp $0,%ebx # is carry zero?
+ jz 28f
+ mov 0(%edi),%eax
+ add %ebx, %eax
+ stosl
+ jnc 28f
+27:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 27b
+28:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+
+ # ebp - 20: caller's esi
+ # ebp - 16: caller's edi
+ # ebp - 12:
+ # ebp - 8: carry
+ # ebp - 4: a_len local
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: pa argument
+ # ebp + 12: a_len argument
+ # ebp + 16: ps argument
+ # ebp + 20:
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+
+.globl s_mpv_sqr_add_prop
+.type s_mpv_sqr_add_prop,@function
+s_mpv_sqr_add_prop:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je s_mpv_sqr_add_prop_x86
+ jg s_mpv_sqr_add_prop_sse2
+ call s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg s_mpv_sqr_add_prop_sse2
+s_mpv_sqr_add_prop_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $12,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # a_len
+ mov 16(%ebp),%edi # edi = ps
+ cmp $0,%ecx
+ je 31f # jump if a_len == 0
+ cld
+ mov 8(%ebp),%esi # esi = pa
+30:
+ lodsl # %eax = [ds:si]; si += 4;
+ mull %eax
+
+ add %ebx,%eax # add "carry"
+ adc $0,%edx
+ mov 0(%edi),%ebx
+ add %ebx,%eax # add low word from result
+ mov 4(%edi),%ebx
+ stosl # [es:di] = %eax; di += 4;
+ adc %ebx,%edx # add high word from result
+ movl $0,%ebx
+ mov %edx,%eax
+ adc $0,%ebx
+ stosl # [es:di] = %eax; di += 4;
+ dec %ecx # --a_len
+ jnz 30b # jmp if a_len != 0
+31:
+ cmp $0,%ebx # is carry zero?
+ jz 34f
+ mov 0(%edi),%eax # add in current word from *c
+ add %ebx,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jnc 34f
+32:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 32b
+34:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+s_mpv_sqr_add_prop_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 16(%ebp),%edi
+ cmp $0,%ecx
+ je 36f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+35:
+ movd 0(%esi),%mm0 # mm0 = *a
+ movd 0(%edi),%mm3 # fetch the sum
+ add $4,%esi
+ pmuludq %mm0,%mm0 # mm0 = sqr(a)
+ paddq %mm0,%mm2 # add the carry
+ paddq %mm3,%mm2 # add the low word
+ movd 4(%edi),%mm3
+ movd %mm2,0(%edi) # store the 32bit result
+ psrlq $32, %mm2
+ paddq %mm3,%mm2 # add the high word
+ movd %mm2,4(%edi) # store the 32bit result
+ psrlq $32, %mm2 # save the carry.
+ add $8,%edi
+ dec %ecx # --a_len
+ jnz 35b # jmp if a_len != 0
+36:
+ movd %mm2,%ebx
+ cmp $0,%ebx # is carry zero?
+ jz 38f
+ mov 0(%edi),%eax
+ add %ebx, %eax
+ stosl
+ jnc 38f
+37:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 37b
+38:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ #
+ # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ # so its high bit is 1. This code is from NSPR.
+ #
+ # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ # mp_digit *qp, mp_digit *rp)
+
+ # esp + 0: Caller's ebx
+ # esp + 4: return address
+ # esp + 8: Nhi argument
+ # esp + 12: Nlo argument
+ # esp + 16: divisor argument
+ # esp + 20: qp argument
+ # esp + 24: rp argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+ #
+
+.globl s_mpv_div_2dx1d
+.type s_mpv_div_2dx1d,@function
+s_mpv_div_2dx1d:
+ push %ebx
+ mov 8(%esp),%edx
+ mov 12(%esp),%eax
+ mov 16(%esp),%ebx
+ div %ebx
+ mov 20(%esp),%ebx
+ mov %eax,0(%ebx)
+ mov 24(%esp),%ebx
+ mov %edx,0(%ebx)
+ xor %eax,%eax # return zero
+ pop %ebx
+ ret
+ nop
+
+ # Magic indicating no need for an executable stack
+.section .note.GNU-stack, "", @progbits
+.previous
diff --git a/security/nss/lib/freebl/mpi/mpi_x86_asm.c b/security/nss/lib/freebl/mpi/mpi_x86_asm.c
new file mode 100644
index 0000000000..4faeef30ca
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_x86_asm.c
@@ -0,0 +1,531 @@
+/*
+ * mpi_x86_asm.c - MSVC inline assembly implementation of s_mpv_ functions.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+
+static int is_sse = -1;
+extern unsigned long s_mpi_is_sse2();
+
+/*
+ * ebp - 36: caller's esi
+ * ebp - 32: caller's edi
+ * ebp - 28:
+ * ebp - 24:
+ * ebp - 20:
+ * ebp - 16:
+ * ebp - 12:
+ * ebp - 8:
+ * ebp - 4:
+ * ebp + 0: caller's ebp
+ * ebp + 4: return address
+ * ebp + 8: a argument
+ * ebp + 12: a_len argument
+ * ebp + 16: b argument
+ * ebp + 20: c argument
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm {
+ mov eax, is_sse
+ cmp eax, 0
+ je s_mpv_mul_d_x86
+ jg s_mpv_mul_d_sse2
+ call s_mpi_is_sse2
+ mov is_sse, eax
+ cmp eax, 0
+ jg s_mpv_mul_d_sse2
+s_mpv_mul_d_x86:
+ push ebp
+ mov ebp,esp
+ sub esp,28
+ push edi
+ push esi
+ push ebx
+ mov ebx,0 ; carry = 0
+ mov ecx,[ebp+12] ; ecx = a_len
+ mov edi,[ebp+20]
+ cmp ecx,0
+ je L_2 ; jmp if a_len == 0
+ mov esi,[ebp+8] ; esi = a
+ cld
+L_1:
+ lodsd ; eax = [ds:esi]; esi += 4
+ mov edx,[ebp+16] ; edx = b
+ mul edx ; edx:eax = Phi:Plo = a_i * b
+
+ add eax,ebx ; add carry (ebx) to edx:eax
+ adc edx,0
+ mov ebx,edx ; high half of product becomes next carry
+
+ stosd ; [es:edi] = ax; edi += 4;
+ dec ecx ; --a_len
+ jnz L_1 ; jmp if a_len != 0
+L_2:
+ mov [edi],ebx ; *c = carry
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_sse2:
+ push ebp
+ mov ebp, esp
+ push edi
+ push esi
+ psubq mm2, mm2 ; carry = 0
+ mov ecx, [ebp+12] ; ecx = a_len
+ movd mm1, [ebp+16] ; mm1 = b
+ mov edi, [ebp+20]
+ cmp ecx, 0
+ je L_6 ; jmp if a_len == 0
+ mov esi, [ebp+8] ; esi = a
+ cld
+L_5:
+ movd mm0, [esi] ; mm0 = *a++
+ add esi, 4
+ pmuludq mm0, mm1 ; mm0 = b * *a++
+ paddq mm2, mm0 ; add the carry
+ movd [edi], mm2 ; store the 32bit result
+ add edi, 4
+ psrlq mm2, 32 ; save the carry
+ dec ecx ; --a_len
+ jnz L_5 ; jmp if a_len != 0
+L_6:
+ movd [edi], mm2 ; *c = carry
+ emms
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+ }
+}
+
+/*
+ * ebp - 36: caller's esi
+ * ebp - 32: caller's edi
+ * ebp - 28:
+ * ebp - 24:
+ * ebp - 20:
+ * ebp - 16:
+ * ebp - 12:
+ * ebp - 8:
+ * ebp - 4:
+ * ebp + 0: caller's ebp
+ * ebp + 4: return address
+ * ebp + 8: a argument
+ * ebp + 12: a_len argument
+ * ebp + 16: b argument
+ * ebp + 20: c argument
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm {
+ mov eax, is_sse
+ cmp eax, 0
+ je s_mpv_mul_d_add_x86
+ jg s_mpv_mul_d_add_sse2
+ call s_mpi_is_sse2
+ mov is_sse, eax
+ cmp eax, 0
+ jg s_mpv_mul_d_add_sse2
+s_mpv_mul_d_add_x86:
+ push ebp
+ mov ebp,esp
+ sub esp,28
+ push edi
+ push esi
+ push ebx
+ mov ebx,0 ; carry = 0
+ mov ecx,[ebp+12] ; ecx = a_len
+ mov edi,[ebp+20]
+ cmp ecx,0
+ je L_11 ; jmp if a_len == 0
+ mov esi,[ebp+8] ; esi = a
+ cld
+L_10:
+ lodsd ; eax = [ds:esi]; esi += 4
+ mov edx,[ebp+16] ; edx = b
+ mul edx ; edx:eax = Phi:Plo = a_i * b
+
+ add eax,ebx ; add carry (ebx) to edx:eax
+ adc edx,0
+ mov ebx,[edi] ; add in current word from *c
+ add eax,ebx
+ adc edx,0
+ mov ebx,edx ; high half of product becomes next carry
+
+ stosd ; [es:edi] = ax; edi += 4;
+ dec ecx ; --a_len
+ jnz L_10 ; jmp if a_len != 0
+L_11:
+ mov [edi],ebx ; *c = carry
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_add_sse2:
+ push ebp
+ mov ebp, esp
+ push edi
+ push esi
+ psubq mm2, mm2 ; carry = 0
+ mov ecx, [ebp+12] ; ecx = a_len
+ movd mm1, [ebp+16] ; mm1 = b
+ mov edi, [ebp+20]
+ cmp ecx, 0
+ je L_16 ; jmp if a_len == 0
+ mov esi, [ebp+8] ; esi = a
+ cld
+L_15:
+ movd mm0, [esi] ; mm0 = *a++
+ add esi, 4
+ pmuludq mm0, mm1 ; mm0 = b * *a++
+ paddq mm2, mm0 ; add the carry
+ movd mm0, [edi]
+ paddq mm2, mm0 ; add the carry
+ movd [edi], mm2 ; store the 32bit result
+ add edi, 4
+ psrlq mm2, 32 ; save the carry
+ dec ecx ; --a_len
+ jnz L_15 ; jmp if a_len != 0
+L_16:
+ movd [edi], mm2 ; *c = carry
+ emms
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+ }
+}
+
+/*
+ * ebp - 36: caller's esi
+ * ebp - 32: caller's edi
+ * ebp - 28:
+ * ebp - 24:
+ * ebp - 20:
+ * ebp - 16:
+ * ebp - 12:
+ * ebp - 8:
+ * ebp - 4:
+ * ebp + 0: caller's ebp
+ * ebp + 4: return address
+ * ebp + 8: a argument
+ * ebp + 12: a_len argument
+ * ebp + 16: b argument
+ * ebp + 20: c argument
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm {
+ mov eax, is_sse
+ cmp eax, 0
+ je s_mpv_mul_d_add_prop_x86
+ jg s_mpv_mul_d_add_prop_sse2
+ call s_mpi_is_sse2
+ mov is_sse, eax
+ cmp eax, 0
+ jg s_mpv_mul_d_add_prop_sse2
+s_mpv_mul_d_add_prop_x86:
+ push ebp
+ mov ebp,esp
+ sub esp,28
+ push edi
+ push esi
+ push ebx
+ mov ebx,0 ; carry = 0
+ mov ecx,[ebp+12] ; ecx = a_len
+ mov edi,[ebp+20]
+ cmp ecx,0
+ je L_21 ; jmp if a_len == 0
+ cld
+ mov esi,[ebp+8] ; esi = a
+L_20:
+ lodsd ; eax = [ds:esi]; esi += 4
+ mov edx,[ebp+16] ; edx = b
+ mul edx ; edx:eax = Phi:Plo = a_i * b
+
+ add eax,ebx ; add carry (ebx) to edx:eax
+ adc edx,0
+ mov ebx,[edi] ; add in current word from *c
+ add eax,ebx
+ adc edx,0
+ mov ebx,edx ; high half of product becomes next carry
+
+ stosd ; [es:edi] = ax; edi += 4;
+ dec ecx ; --a_len
+ jnz L_20 ; jmp if a_len != 0
+L_21:
+ cmp ebx,0 ; is carry zero?
+ jz L_23
+ mov eax,[edi] ; add in current word from *c
+ add eax,ebx
+ stosd ; [es:edi] = ax; edi += 4;
+ jnc L_23
+L_22:
+ mov eax,[edi] ; add in current word from *c
+ adc eax,0
+ stosd ; [es:edi] = ax; edi += 4;
+ jc L_22
+L_23:
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_add_prop_sse2:
+ push ebp
+ mov ebp, esp
+ push edi
+ push esi
+ push ebx
+ psubq mm2, mm2 ; carry = 0
+ mov ecx, [ebp+12] ; ecx = a_len
+ movd mm1, [ebp+16] ; mm1 = b
+ mov edi, [ebp+20]
+ cmp ecx, 0
+ je L_26 ; jmp if a_len == 0
+ mov esi, [ebp+8] ; esi = a
+ cld
+L_25:
+ movd mm0, [esi] ; mm0 = *a++
+ movd mm3, [edi] ; fetch the sum
+ add esi, 4
+ pmuludq mm0, mm1 ; mm0 = b * *a++
+ paddq mm2, mm0 ; add the carry
+ paddq mm2, mm3 ; add *c++
+ movd [edi], mm2 ; store the 32bit result
+ add edi, 4
+ psrlq mm2, 32 ; save the carry
+ dec ecx ; --a_len
+ jnz L_25 ; jmp if a_len != 0
+L_26:
+ movd ebx, mm2
+ cmp ebx, 0 ; is carry zero?
+ jz L_28
+ mov eax, [edi]
+ add eax, ebx
+ stosd
+ jnc L_28
+L_27:
+ mov eax, [edi] ; add in current word from *c
+ adc eax, 0
+ stosd ; [es:edi] = ax; edi += 4;
+ jc L_27
+L_28:
+ emms
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+ }
+}
+
+/*
+ * ebp - 20: caller's esi
+ * ebp - 16: caller's edi
+ * ebp - 12:
+ * ebp - 8: carry
+ * ebp - 4: a_len local
+ * ebp + 0: caller's ebp
+ * ebp + 4: return address
+ * ebp + 8: pa argument
+ * ebp + 12: a_len argument
+ * ebp + 16: ps argument
+ * ebp + 20:
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs)
+{
+ __asm {
+ mov eax, is_sse
+ cmp eax, 0
+ je s_mpv_sqr_add_prop_x86
+ jg s_mpv_sqr_add_prop_sse2
+ call s_mpi_is_sse2
+ mov is_sse, eax
+ cmp eax, 0
+ jg s_mpv_sqr_add_prop_sse2
+s_mpv_sqr_add_prop_x86:
+ push ebp
+ mov ebp,esp
+ sub esp,12
+ push edi
+ push esi
+ push ebx
+ mov ebx,0 ; carry = 0
+ mov ecx,[ebp+12] ; a_len
+ mov edi,[ebp+16] ; edi = ps
+ cmp ecx,0
+ je L_31 ; jump if a_len == 0
+ cld
+ mov esi,[ebp+8] ; esi = pa
+L_30:
+ lodsd ; eax = [ds:si]; si += 4;
+ mul eax
+
+ add eax,ebx ; add "carry"
+ adc edx,0
+ mov ebx,[edi]
+ add eax,ebx ; add low word from result
+ mov ebx,[edi+4]
+ stosd ; [es:di] = eax; di += 4;
+ adc edx,ebx ; add high word from result
+ mov ebx,0
+ mov eax,edx
+ adc ebx,0
+ stosd ; [es:di] = eax; di += 4;
+ dec ecx ; --a_len
+ jnz L_30 ; jmp if a_len != 0
+L_31:
+ cmp ebx,0 ; is carry zero?
+ jz L_34
+ mov eax,[edi] ; add in current word from *c
+ add eax,ebx
+ stosd ; [es:edi] = ax; edi += 4;
+ jnc L_34
+L_32:
+ mov eax,[edi] ; add in current word from *c
+ adc eax,0
+ stosd ; [es:edi] = ax; edi += 4;
+ jc L_32
+L_34:
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+s_mpv_sqr_add_prop_sse2:
+ push ebp
+ mov ebp, esp
+ push edi
+ push esi
+ push ebx
+ psubq mm2, mm2 ; carry = 0
+ mov ecx, [ebp+12] ; ecx = a_len
+ mov edi, [ebp+16]
+ cmp ecx, 0
+ je L_36 ; jmp if a_len == 0
+ mov esi, [ebp+8] ; esi = a
+ cld
+L_35:
+ movd mm0, [esi] ; mm0 = *a
+ movd mm3, [edi] ; fetch the sum
+ add esi, 4
+ pmuludq mm0, mm0 ; mm0 = sqr(a)
+ paddq mm2, mm0 ; add the carry
+ paddq mm2, mm3 ; add the low word
+ movd mm3, [edi+4]
+ movd [edi], mm2 ; store the 32bit result
+ psrlq mm2, 32
+ paddq mm2, mm3 ; add the high word
+ movd [edi+4], mm2 ; store the 32bit result
+ psrlq mm2, 32 ; save the carry.
+ add edi, 8
+ dec ecx ; --a_len
+ jnz L_35 ; jmp if a_len != 0
+L_36:
+ movd ebx, mm2
+ cmp ebx, 0 ; is carry zero?
+ jz L_38
+ mov eax, [edi]
+ add eax, ebx
+ stosd
+ jnc L_38
+L_37:
+ mov eax, [edi] ; add in current word from *c
+ adc eax, 0
+ stosd ; [es:edi] = ax; edi += 4;
+ jc L_37
+L_38:
+ emms
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+ }
+}
+
+/*
+ * Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ * so its high bit is 1. This code is from NSPR.
+ *
+ * Dump of assembler code for function s_mpv_div_2dx1d:
+ *
+ * esp + 0: Caller's ebx
+ * esp + 4: return address
+ * esp + 8: Nhi argument
+ * esp + 12: Nlo argument
+ * esp + 16: divisor argument
+ * esp + 20: qp argument
+ * esp + 24: rp argument
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) mp_err
+ s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ mp_digit *qp, mp_digit *rp)
+{
+ __asm {
+ push ebx
+ mov edx,[esp+8]
+ mov eax,[esp+12]
+ mov ebx,[esp+16]
+ div ebx
+ mov ebx,[esp+20]
+ mov [ebx],eax
+ mov ebx,[esp+24]
+ mov [ebx],edx
+ xor eax,eax ; return zero
+ pop ebx
+ ret
+ nop
+ }
+}
diff --git a/security/nss/lib/freebl/mpi/mpi_x86_os2.s b/security/nss/lib/freebl/mpi/mpi_x86_os2.s
new file mode 100644
index 0000000000..b903e2564a
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_x86_os2.s
@@ -0,0 +1,538 @@
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+.data
+.align 4
+ #
+ # -1 means to call _s_mpi_is_sse to determine if we support sse
+ # instructions.
+ # 0 means to use x86 instructions
+ # 1 means to use sse2 instructions
+.type is_sse,@object
+.size is_sse,4
+is_sse: .long -1
+
+#
+# sigh, handle the difference between -fPIC and not PIC
+# default to pic, since this file seems to be exclusively
+# linux right now (solaris uses mpi_i86pc.s and windows uses
+# mpi_x86_asm.c)
+#
+#.ifndef NO_PIC
+#.macro GET var,reg
+# movl \var@GOTOFF(%ebx),\reg
+#.endm
+#.macro PUT reg,var
+# movl \reg,\var@GOTOFF(%ebx)
+#.endm
+#.else
+.macro GET var,reg
+ movl \var,\reg
+.endm
+.macro PUT reg,var
+ movl \reg,\var
+.endm
+#.endif
+
+.text
+
+
+ # ebp - 36: caller's esi
+ # ebp - 32: caller's edi
+ # ebp - 28:
+ # ebp - 24:
+ # ebp - 20:
+ # ebp - 16:
+ # ebp - 12:
+ # ebp - 8:
+ # ebp - 4:
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl _s_mpv_mul_d
+.type _s_mpv_mul_d,@function
+_s_mpv_mul_d:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je _s_mpv_mul_d_x86
+ jg _s_mpv_mul_d_sse2
+ call _s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg _s_mpv_mul_d_sse2
+_s_mpv_mul_d_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+1:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ mov %ebx,0(%edi) # *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+_s_mpv_mul_d_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 6f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+5:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 5b # jmp if a_len != 0
+6:
+ movd %mm2,0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 36: caller's esi
+ # ebp - 32: caller's edi
+ # ebp - 28:
+ # ebp - 24:
+ # ebp - 20:
+ # ebp - 16:
+ # ebp - 12:
+ # ebp - 8:
+ # ebp - 4:
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl _s_mpv_mul_d_add
+.type _s_mpv_mul_d_add,@function
+_s_mpv_mul_d_add:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je _s_mpv_mul_d_add_x86
+ jg _s_mpv_mul_d_add_sse2
+ call _s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg _s_mpv_mul_d_add_sse2
+_s_mpv_mul_d_add_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 11f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+10:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx # add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 10b # jmp if a_len != 0
+11:
+ mov %ebx,0(%edi) # *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+_s_mpv_mul_d_add_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 16f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+15:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ movd 0(%edi),%mm0
+ paddq %mm0,%mm2 # add the carry
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 15b # jmp if a_len != 0
+16:
+ movd %mm2,0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl _s_mpv_mul_d_add_prop
+.type _s_mpv_mul_d_add_prop,@function
+_s_mpv_mul_d_add_prop:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je _s_mpv_mul_d_add_prop_x86
+ jg _s_mpv_mul_d_add_prop_sse2
+ call _s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg _s_mpv_mul_d_add_prop_sse2
+_s_mpv_mul_d_add_prop_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 21f # jmp if a_len == 0
+ cld
+ mov 8(%ebp),%esi # esi = a
+20:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx # add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 20b # jmp if a_len != 0
+21:
+ cmp $0,%ebx # is carry zero?
+ jz 23f
+ mov 0(%edi),%eax # add in current word from *c
+ add %ebx,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jnc 23f
+22:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 22b
+23:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+_s_mpv_mul_d_add_prop_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 26f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+25:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ movd 0(%edi),%mm3 # fetch the sum
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ paddq %mm3,%mm2 # add *c++
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 25b # jmp if a_len != 0
+26:
+ movd %mm2,%ebx
+ cmp $0,%ebx # is carry zero?
+ jz 28f
+ mov 0(%edi),%eax
+ add %ebx, %eax
+ stosl
+ jnc 28f
+27:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 27b
+28:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+
+ # ebp - 20: caller's esi
+ # ebp - 16: caller's edi
+ # ebp - 12:
+ # ebp - 8: carry
+ # ebp - 4: a_len local
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: pa argument
+ # ebp + 12: a_len argument
+ # ebp + 16: ps argument
+ # ebp + 20:
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+
+.globl _s_mpv_sqr_add_prop
+.type _s_mpv_sqr_add_prop,@function
+_s_mpv_sqr_add_prop:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je _s_mpv_sqr_add_prop_x86
+ jg _s_mpv_sqr_add_prop_sse2
+ call _s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg _s_mpv_sqr_add_prop_sse2
+_s_mpv_sqr_add_prop_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $12,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # a_len
+ mov 16(%ebp),%edi # edi = ps
+ cmp $0,%ecx
+ je 31f # jump if a_len == 0
+ cld
+ mov 8(%ebp),%esi # esi = pa
+30:
+ lodsl # %eax = [ds:si]; si += 4;
+ mull %eax
+
+ add %ebx,%eax # add "carry"
+ adc $0,%edx
+ mov 0(%edi),%ebx
+ add %ebx,%eax # add low word from result
+ mov 4(%edi),%ebx
+ stosl # [es:di] = %eax; di += 4;
+ adc %ebx,%edx # add high word from result
+ movl $0,%ebx
+ mov %edx,%eax
+ adc $0,%ebx
+ stosl # [es:di] = %eax; di += 4;
+ dec %ecx # --a_len
+ jnz 30b # jmp if a_len != 0
+31:
+ cmp $0,%ebx # is carry zero?
+ jz 34f
+ mov 0(%edi),%eax # add in current word from *c
+ add %ebx,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jnc 34f
+32:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 32b
+34:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+_s_mpv_sqr_add_prop_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 16(%ebp),%edi
+ cmp $0,%ecx
+ je 36f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+35:
+ movd 0(%esi),%mm0 # mm0 = *a
+ movd 0(%edi),%mm3 # fetch the sum
+ add $4,%esi
+ pmuludq %mm0,%mm0 # mm0 = sqr(a)
+ paddq %mm0,%mm2 # add the carry
+ paddq %mm3,%mm2 # add the low word
+ movd 4(%edi),%mm3
+ movd %mm2,0(%edi) # store the 32bit result
+ psrlq $32, %mm2
+ paddq %mm3,%mm2 # add the high word
+ movd %mm2,4(%edi) # store the 32bit result
+ psrlq $32, %mm2 # save the carry.
+ add $8,%edi
+ dec %ecx # --a_len
+ jnz 35b # jmp if a_len != 0
+36:
+ movd %mm2,%ebx
+ cmp $0,%ebx # is carry zero?
+ jz 38f
+ mov 0(%edi),%eax
+ add %ebx, %eax
+ stosl
+ jnc 38f
+37:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 37b
+38:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ #
+ # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ # so its high bit is 1. This code is from NSPR.
+ #
+ # mp_err _s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ # mp_digit *qp, mp_digit *rp)
+
+ # esp + 0: Caller's ebx
+ # esp + 4: return address
+ # esp + 8: Nhi argument
+ # esp + 12: Nlo argument
+ # esp + 16: divisor argument
+ # esp + 20: qp argument
+ # esp + 24: rp argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+ #
+
+.globl _s_mpv_div_2dx1d
+.type _s_mpv_div_2dx1d,@function
+_s_mpv_div_2dx1d:
+ push %ebx
+ mov 8(%esp),%edx
+ mov 12(%esp),%eax
+ mov 16(%esp),%ebx
+ div %ebx
+ mov 20(%esp),%ebx
+ mov %eax,0(%ebx)
+ mov 24(%esp),%ebx
+ mov %edx,0(%ebx)
+ xor %eax,%eax # return zero
+ pop %ebx
+ ret
+ nop
+
diff --git a/security/nss/lib/freebl/mpi/mplogic.c b/security/nss/lib/freebl/mpi/mplogic.c
new file mode 100644
index 0000000000..db19cff138
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mplogic.c
@@ -0,0 +1,460 @@
+/*
+ * mplogic.c
+ *
+ * Bitwise logical operations on MPI values
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#include "mplogic.h"
+
+/* {{{ Lookup table for population count */
+
+static unsigned char bitc[] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+};
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/*
+ mpl_not(a, b) - compute b = ~a
+ mpl_and(a, b, c) - compute c = a & b
+ mpl_or(a, b, c) - compute c = a | b
+ mpl_xor(a, b, c) - compute c = a ^ b
+ */
+
+/* {{{ mpl_not(a, b) */
+
+mp_err
+mpl_not(mp_int *a, mp_int *b)
+{
+ mp_err res;
+ unsigned int ix;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ /* This relies on the fact that the digit type is unsigned */
+ for (ix = 0; ix < USED(b); ix++)
+ DIGIT(b, ix) = ~DIGIT(b, ix);
+
+ s_mp_clamp(b);
+
+ return MP_OKAY;
+
+} /* end mpl_not() */
+
+/* }}} */
+
+/* {{{ mpl_and(a, b, c) */
+
+mp_err
+mpl_and(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_int *which, *other;
+ mp_err res;
+ unsigned int ix;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (USED(a) <= USED(b)) {
+ which = a;
+ other = b;
+ } else {
+ which = b;
+ other = a;
+ }
+
+ if ((res = mp_copy(which, c)) != MP_OKAY)
+ return res;
+
+ for (ix = 0; ix < USED(which); ix++)
+ DIGIT(c, ix) &= DIGIT(other, ix);
+
+ s_mp_clamp(c);
+
+ return MP_OKAY;
+
+} /* end mpl_and() */
+
+/* }}} */
+
+/* {{{ mpl_or(a, b, c) */
+
+mp_err
+mpl_or(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_int *which, *other;
+ mp_err res;
+ unsigned int ix;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (USED(a) >= USED(b)) {
+ which = a;
+ other = b;
+ } else {
+ which = b;
+ other = a;
+ }
+
+ if ((res = mp_copy(which, c)) != MP_OKAY)
+ return res;
+
+ for (ix = 0; ix < USED(which); ix++)
+ DIGIT(c, ix) |= DIGIT(other, ix);
+
+ return MP_OKAY;
+
+} /* end mpl_or() */
+
+/* }}} */
+
+/* {{{ mpl_xor(a, b, c) */
+
+mp_err
+mpl_xor(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_int *which, *other;
+ mp_err res;
+ unsigned int ix;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (USED(a) >= USED(b)) {
+ which = a;
+ other = b;
+ } else {
+ which = b;
+ other = a;
+ }
+
+ if ((res = mp_copy(which, c)) != MP_OKAY)
+ return res;
+
+ for (ix = 0; ix < USED(which); ix++)
+ DIGIT(c, ix) ^= DIGIT(other, ix);
+
+ s_mp_clamp(c);
+
+ return MP_OKAY;
+
+} /* end mpl_xor() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/*
+ mpl_rsh(a, b, d) - b = a >> d
+ mpl_lsh(a, b, d) - b = a << d
+ */
+
+/* {{{ mpl_rsh(a, b, d) */
+
+mp_err
+mpl_rsh(const mp_int *a, mp_int *b, mp_digit d)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ s_mp_div_2d(b, d);
+
+ return MP_OKAY;
+
+} /* end mpl_rsh() */
+
+/* }}} */
+
+/* {{{ mpl_lsh(a, b, d) */
+
+mp_err
+mpl_lsh(const mp_int *a, mp_int *b, mp_digit d)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ return s_mp_mul_2d(b, d);
+
+} /* end mpl_lsh() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/*
+ mpl_num_set(a, num)
+
+ Count the number of set bits in the binary representation of a.
+ Returns MP_OKAY and sets 'num' to be the number of such bits, if
+ possible. If num is NULL, the result is thrown away, but it is
+ not considered an error.
+
+ mpl_num_clear() does basically the same thing for clear bits.
+ */
+
+/* {{{ mpl_num_set(a, num) */
+
+mp_err
+mpl_num_set(mp_int *a, unsigned int *num)
+{
+ unsigned int ix, db, nset = 0;
+ mp_digit cur;
+ unsigned char reg;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ for (ix = 0; ix < USED(a); ix++) {
+ cur = DIGIT(a, ix);
+
+ for (db = 0; db < sizeof(mp_digit); db++) {
+ reg = (unsigned char)(cur >> (CHAR_BIT * db));
+
+ nset += bitc[reg];
+ }
+ }
+
+ if (num)
+ *num = nset;
+
+ return MP_OKAY;
+
+} /* end mpl_num_set() */
+
+/* }}} */
+
+/* {{{ mpl_num_clear(a, num) */
+
+mp_err
+mpl_num_clear(mp_int *a, unsigned int *num)
+{
+ unsigned int ix, db, nset = 0;
+ mp_digit cur;
+ unsigned char reg;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ for (ix = 0; ix < USED(a); ix++) {
+ cur = DIGIT(a, ix);
+
+ for (db = 0; db < sizeof(mp_digit); db++) {
+ reg = (unsigned char)(cur >> (CHAR_BIT * db));
+
+ nset += bitc[UCHAR_MAX - reg];
+ }
+ }
+
+ if (num)
+ *num = nset;
+
+ return MP_OKAY;
+
+} /* end mpl_num_clear() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/*
+ mpl_parity(a)
+
+ Determines the bitwise parity of the value given. Returns MP_EVEN
+ if an even number of digits are set, MP_ODD if an odd number are
+ set.
+ */
+
+/* {{{ mpl_parity(a) */
+
+mp_err
+mpl_parity(mp_int *a)
+{
+ unsigned int ix;
+ int par = 0;
+ mp_digit cur;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ for (ix = 0; ix < USED(a); ix++) {
+ int shft = (sizeof(mp_digit) * CHAR_BIT) / 2;
+
+ cur = DIGIT(a, ix);
+
+ /* Compute parity for current digit */
+ while (shft != 0) {
+ cur ^= (cur >> shft);
+ shft >>= 1;
+ }
+ cur &= 1;
+
+ /* XOR with running parity so far */
+ par ^= cur;
+ }
+
+ if (par)
+ return MP_ODD;
+ else
+ return MP_EVEN;
+
+} /* end mpl_parity() */
+
+/* }}} */
+
+/*
+ mpl_set_bit
+
+ Returns MP_OKAY or some error code.
+ Grows a if needed to set a bit to 1.
+ */
+mp_err
+mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value)
+{
+ mp_size ix;
+ mp_err rv;
+ mp_digit mask;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ ix = bitNum / MP_DIGIT_BIT;
+ if (ix + 1 > MP_USED(a)) {
+ rv = s_mp_pad(a, ix + 1);
+ if (rv != MP_OKAY)
+ return rv;
+ }
+
+ bitNum = bitNum % MP_DIGIT_BIT;
+ mask = (mp_digit)1 << bitNum;
+ if (value)
+ MP_DIGIT(a, ix) |= mask;
+ else
+ MP_DIGIT(a, ix) &= ~mask;
+ s_mp_clamp(a);
+ return MP_OKAY;
+}
+
+/*
+ mpl_get_bit
+
+ returns 0 or 1 or some (negative) error code.
+ */
+mp_err
+mpl_get_bit(const mp_int *a, mp_size bitNum)
+{
+ mp_size bit, ix;
+ mp_err rv;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ ix = bitNum / MP_DIGIT_BIT;
+ ARGCHK(ix <= MP_USED(a) - 1, MP_RANGE);
+
+ bit = bitNum % MP_DIGIT_BIT;
+ rv = (mp_err)(MP_DIGIT(a, ix) >> bit) & 1;
+ return rv;
+}
+
+/*
+ mpl_get_bits
+ - Extracts numBits bits from a, where the least significant extracted bit
+ is bit lsbNum. Returns a negative value if error occurs.
+ - Because sign bit is used to indicate error, maximum number of bits to
+ be returned is the lesser of (a) the number of bits in an mp_digit, or
+ (b) one less than the number of bits in an mp_err.
+ - lsbNum + numbits can be greater than the number of significant bits in
+ integer a, as long as bit lsbNum is in the high order digit of a.
+ */
+mp_err
+mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits)
+{
+ mp_size rshift = (lsbNum % MP_DIGIT_BIT);
+ mp_size lsWndx = (lsbNum / MP_DIGIT_BIT);
+ mp_digit *digit = MP_DIGITS(a) + lsWndx;
+ mp_digit mask = ((1 << numBits) - 1);
+
+ ARGCHK(numBits < CHAR_BIT * sizeof mask, MP_BADARG);
+ ARGCHK(MP_HOWMANY(lsbNum, MP_DIGIT_BIT) <= MP_USED(a), MP_RANGE);
+
+ if ((numBits + lsbNum % MP_DIGIT_BIT <= MP_DIGIT_BIT) ||
+ (lsWndx + 1 >= MP_USED(a))) {
+ mask &= (digit[0] >> rshift);
+ } else {
+ mask &= ((digit[0] >> rshift) | (digit[1] << (MP_DIGIT_BIT - rshift)));
+ }
+ return (mp_err)mask;
+}
+
+#define LZCNTLOOP(i) \
+ do { \
+ x = d >> (i); \
+ mask = (0 - x); \
+ mask = (0 - (mask >> (MP_DIGIT_BIT - 1))); \
+ bits += (i)&mask; \
+ d ^= (x ^ d) & mask; \
+ } while (0)
+
+/*
+ mpl_significant_bits
+ returns number of significant bits in abs(a).
+ In other words: floor(lg(abs(a))) + 1.
+ returns 1 if value is zero.
+ */
+mp_size
+mpl_significant_bits(const mp_int *a)
+{
+ /*
+ start bits at 1.
+ lg(0) = 0 => bits = 1 by function semantics.
+ below does a binary search for the _position_ of the top bit set,
+ which is floor(lg(abs(a))) for a != 0.
+ */
+ mp_size bits = 1;
+ int ix;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ for (ix = MP_USED(a); ix > 0;) {
+ mp_digit d, x, mask;
+ if ((d = MP_DIGIT(a, --ix)) == 0)
+ continue;
+#if !defined(MP_USE_UINT_DIGIT)
+ LZCNTLOOP(32);
+#endif
+ LZCNTLOOP(16);
+ LZCNTLOOP(8);
+ LZCNTLOOP(4);
+ LZCNTLOOP(2);
+ LZCNTLOOP(1);
+ break;
+ }
+ bits += ix * MP_DIGIT_BIT;
+ return bits;
+}
+
+#undef LZCNTLOOP
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS */
diff --git a/security/nss/lib/freebl/mpi/mplogic.h b/security/nss/lib/freebl/mpi/mplogic.h
new file mode 100644
index 0000000000..71b7551392
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mplogic.h
@@ -0,0 +1,55 @@
+/*
+ * mplogic.h
+ *
+ * Bitwise logical operations on MPI values
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _H_MPLOGIC_
+#define _H_MPLOGIC_
+
+#include "mpi.h"
+SEC_BEGIN_PROTOS
+
+/*
+ The logical operations treat an mp_int as if it were a bit vector,
+ without regard to its sign (an mp_int is represented in a signed
+ magnitude format). Values are treated as if they had an infinite
+ string of zeros left of the most-significant bit.
+ */
+
+/* Parity results */
+
+#define MP_EVEN MP_YES
+#define MP_ODD MP_NO
+
+/* Bitwise functions */
+
+mp_err mpl_not(mp_int *a, mp_int *b); /* one's complement */
+mp_err mpl_and(mp_int *a, mp_int *b, mp_int *c); /* bitwise AND */
+mp_err mpl_or(mp_int *a, mp_int *b, mp_int *c); /* bitwise OR */
+mp_err mpl_xor(mp_int *a, mp_int *b, mp_int *c); /* bitwise XOR */
+
+/* Shift functions */
+
+mp_err mpl_rsh(const mp_int *a, mp_int *b, mp_digit d); /* right shift */
+mp_err mpl_lsh(const mp_int *a, mp_int *b, mp_digit d); /* left shift */
+
+/* Bit count and parity */
+
+mp_err mpl_num_set(mp_int *a, unsigned int *num); /* count set bits */
+mp_err mpl_num_clear(mp_int *a, unsigned int *num); /* count clear bits */
+mp_err mpl_parity(mp_int *a); /* determine parity */
+
+/* Get & Set the value of a bit */
+
+mp_err mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value);
+mp_err mpl_get_bit(const mp_int *a, mp_size bitNum);
+mp_err mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits);
+mp_size mpl_significant_bits(const mp_int *a);
+
+SEC_END_PROTOS
+
+#endif /* end _H_MPLOGIC_ */
diff --git a/security/nss/lib/freebl/mpi/mpmontg.c b/security/nss/lib/freebl/mpi/mpmontg.c
new file mode 100644
index 0000000000..63842c6314
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpmontg.c
@@ -0,0 +1,1160 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This file implements moduluar exponentiation using Montgomery's
+ * method for modular reduction. This file implements the method
+ * described as "Improvement 2" in the paper "A Cryptogrpahic Library for
+ * the Motorola DSP56000" by Stephen R. Dusse' and Burton S. Kaliski Jr.
+ * published in "Advances in Cryptology: Proceedings of EUROCRYPT '90"
+ * "Lecture Notes in Computer Science" volume 473, 1991, pg 230-244,
+ * published by Springer Verlag.
+ */
+
+#define MP_USING_CACHE_SAFE_MOD_EXP 1
+#include <string.h>
+#include "mpi-priv.h"
+#include "mplogic.h"
+#include "mpprime.h"
+#ifdef MP_USING_MONT_MULF
+#include "montmulf.h"
+#endif
+#include <stddef.h> /* ptrdiff_t */
+#include <assert.h>
+
+#define STATIC
+
+#define MAX_ODD_INTS 32 /* 2 ** (WINDOW_BITS - 1) */
+
+/*! computes T = REDC(T), 2^b == R
+ \param T < RN
+*/
+mp_err
+s_mp_redc(mp_int *T, mp_mont_modulus *mmm)
+{
+ mp_err res;
+ mp_size i;
+
+ i = (MP_USED(&mmm->N) << 1) + 1;
+ MP_CHECKOK(s_mp_pad(T, i));
+ for (i = 0; i < MP_USED(&mmm->N); ++i) {
+ mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime;
+ /* T += N * m_i * (MP_RADIX ** i); */
+ s_mp_mul_d_add_offset(&mmm->N, m_i, T, i);
+ }
+ s_mp_clamp(T);
+
+ /* T /= R */
+ s_mp_rshd(T, MP_USED(&mmm->N));
+
+ if ((res = s_mp_cmp(T, &mmm->N)) >= 0) {
+ /* T = T - N */
+ MP_CHECKOK(s_mp_sub(T, &mmm->N));
+#ifdef DEBUG
+ if ((res = mp_cmp(T, &mmm->N)) >= 0) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+#endif
+ }
+ res = MP_OKAY;
+CLEANUP:
+ return res;
+}
+
+#if !defined(MP_MONT_USE_MP_MUL)
+
+/*! c <- REDC( a * b ) mod N
+ \param a < N i.e. "reduced"
+ \param b < N i.e. "reduced"
+ \param mmm modulus N and n0' of N
+*/
+mp_err
+s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c,
+ mp_mont_modulus *mmm)
+{
+ mp_digit *pb;
+ mp_digit m_i;
+ mp_err res;
+ mp_size ib; /* "index b": index of current digit of B */
+ mp_size useda, usedb;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (MP_USED(a) < MP_USED(b)) {
+ const mp_int *xch = b; /* switch a and b, to do fewer outer loops */
+ b = a;
+ a = xch;
+ }
+
+ MP_USED(c) = 1;
+ MP_DIGIT(c, 0) = 0;
+ ib = (MP_USED(&mmm->N) << 1) + 1;
+ if ((res = s_mp_pad(c, ib)) != MP_OKAY)
+ goto CLEANUP;
+
+ useda = MP_USED(a);
+ pb = MP_DIGITS(b);
+ s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c));
+ s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1));
+ m_i = MP_DIGIT(c, 0) * mmm->n0prime;
+ s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0);
+
+ /* Outer loop: Digits of b */
+ usedb = MP_USED(b);
+ for (ib = 1; ib < usedb; ib++) {
+ mp_digit b_i = *pb++;
+
+ /* Inner product: Digits of a */
+ if (b_i)
+ s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
+ m_i = MP_DIGIT(c, ib) * mmm->n0prime;
+ s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
+ }
+ if (usedb < MP_USED(&mmm->N)) {
+ for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib) {
+ m_i = MP_DIGIT(c, ib) * mmm->n0prime;
+ s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
+ }
+ }
+ s_mp_clamp(c);
+ s_mp_rshd(c, MP_USED(&mmm->N)); /* c /= R */
+ if (s_mp_cmp(c, &mmm->N) >= 0) {
+ MP_CHECKOK(s_mp_sub(c, &mmm->N));
+ }
+ res = MP_OKAY;
+
+CLEANUP:
+ return res;
+}
+#endif
+
+mp_err
+mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont)
+{
+ mp_err res;
+
+ /* xMont = x * R mod N where N is modulus */
+ if (x != xMont) {
+ MP_CHECKOK(mp_copy(x, xMont));
+ }
+ MP_CHECKOK(s_mp_lshd(xMont, MP_USED(N))); /* xMont = x << b */
+ MP_CHECKOK(mp_div(xMont, N, 0, xMont)); /* mod N */
+CLEANUP:
+ return res;
+}
+
+mp_digit
+mp_calculate_mont_n0i(const mp_int *N)
+{
+ return 0 - s_mp_invmod_radix(MP_DIGIT(N, 0));
+}
+
+#ifdef MP_USING_MONT_MULF
+
+/* the floating point multiply is already cache safe,
+ * don't turn on cache safe unless we specifically
+ * force it */
+#ifndef MP_FORCE_CACHE_SAFE
+#undef MP_USING_CACHE_SAFE_MOD_EXP
+#endif
+
+unsigned int mp_using_mont_mulf = 1;
+
+/* computes montgomery square of the integer in mResult */
+#define SQR \
+ conv_i32_to_d32_and_d16(dm1, d16Tmp, mResult, nLen); \
+ mont_mulf_noconv(mResult, dm1, d16Tmp, \
+ dTmp, dn, MP_DIGITS(modulus), nLen, dn0)
+
+/* computes montgomery product of x and the integer in mResult */
+#define MUL(x) \
+ conv_i32_to_d32(dm1, mResult, nLen); \
+ mont_mulf_noconv(mResult, dm1, oddPowers[x], \
+ dTmp, dn, MP_DIGITS(modulus), nLen, dn0)
+
+/* Do modular exponentiation using floating point multiply code. */
+mp_err
+mp_exptmod_f(const mp_int *montBase,
+ const mp_int *exponent,
+ const mp_int *modulus,
+ mp_int *result,
+ mp_mont_modulus *mmm,
+ int nLen,
+ mp_size bits_in_exponent,
+ mp_size window_bits,
+ mp_size odd_ints)
+{
+ mp_digit *mResult;
+ double *dBuf = 0, *dm1, *dn, *dSqr, *d16Tmp, *dTmp;
+ double dn0;
+ mp_size i;
+ mp_err res;
+ int expOff;
+ int dSize = 0, oddPowSize, dTmpSize;
+ mp_int accum1;
+ double *oddPowers[MAX_ODD_INTS];
+
+ /* function for computing n0prime only works if n0 is odd */
+
+ MP_DIGITS(&accum1) = 0;
+
+ for (i = 0; i < MAX_ODD_INTS; ++i)
+ oddPowers[i] = 0;
+
+ MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
+
+ mp_set(&accum1, 1);
+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
+ MP_CHECKOK(s_mp_pad(&accum1, nLen));
+
+ oddPowSize = 2 * nLen + 1;
+ dTmpSize = 2 * oddPowSize;
+ dSize = sizeof(double) * (nLen * 4 + 1 +
+ ((odd_ints + 1) * oddPowSize) + dTmpSize);
+ dBuf = malloc(dSize);
+ if (!dBuf) {
+ res = MP_MEM;
+ goto CLEANUP;
+ }
+ dm1 = dBuf; /* array of d32 */
+ dn = dBuf + nLen; /* array of d32 */
+ dSqr = dn + nLen; /* array of d32 */
+ d16Tmp = dSqr + nLen; /* array of d16 */
+ dTmp = d16Tmp + oddPowSize;
+
+ for (i = 0; i < odd_ints; ++i) {
+ oddPowers[i] = dTmp;
+ dTmp += oddPowSize;
+ }
+ mResult = (mp_digit *)(dTmp + dTmpSize); /* size is nLen + 1 */
+
+ /* Make dn and dn0 */
+ conv_i32_to_d32(dn, MP_DIGITS(modulus), nLen);
+ dn0 = (double)(mmm->n0prime & 0xffff);
+
+ /* Make dSqr */
+ conv_i32_to_d32_and_d16(dm1, oddPowers[0], MP_DIGITS(montBase), nLen);
+ mont_mulf_noconv(mResult, dm1, oddPowers[0],
+ dTmp, dn, MP_DIGITS(modulus), nLen, dn0);
+ conv_i32_to_d32(dSqr, mResult, nLen);
+
+ for (i = 1; i < odd_ints; ++i) {
+ mont_mulf_noconv(mResult, dSqr, oddPowers[i - 1],
+ dTmp, dn, MP_DIGITS(modulus), nLen, dn0);
+ conv_i32_to_d16(oddPowers[i], mResult, nLen);
+ }
+
+ s_mp_copy(MP_DIGITS(&accum1), mResult, nLen); /* from, to, len */
+
+ for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
+ mp_size smallExp;
+ MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
+ smallExp = (mp_size)res;
+
+ if (window_bits == 1) {
+ if (!smallExp) {
+ SQR;
+ } else if (smallExp & 1) {
+ SQR;
+ MUL(0);
+ } else {
+ abort();
+ }
+ } else if (window_bits == 4) {
+ if (!smallExp) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 1) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 2);
+ } else if (smallExp & 2) {
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 4);
+ SQR;
+ } else if (smallExp & 4) {
+ SQR;
+ SQR;
+ MUL(smallExp / 8);
+ SQR;
+ SQR;
+ } else if (smallExp & 8) {
+ SQR;
+ MUL(smallExp / 16);
+ SQR;
+ SQR;
+ SQR;
+ } else {
+ abort();
+ }
+ } else if (window_bits == 5) {
+ if (!smallExp) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 1) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 2);
+ } else if (smallExp & 2) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 4);
+ SQR;
+ } else if (smallExp & 4) {
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 8);
+ SQR;
+ SQR;
+ } else if (smallExp & 8) {
+ SQR;
+ SQR;
+ MUL(smallExp / 16);
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 0x10) {
+ SQR;
+ MUL(smallExp / 32);
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else {
+ abort();
+ }
+ } else if (window_bits == 6) {
+ if (!smallExp) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 1) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 2);
+ } else if (smallExp & 2) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 4);
+ SQR;
+ } else if (smallExp & 4) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 8);
+ SQR;
+ SQR;
+ } else if (smallExp & 8) {
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 16);
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 0x10) {
+ SQR;
+ SQR;
+ MUL(smallExp / 32);
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 0x20) {
+ SQR;
+ MUL(smallExp / 64);
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else {
+ abort();
+ }
+ } else {
+ abort();
+ }
+ }
+
+ s_mp_copy(mResult, MP_DIGITS(&accum1), nLen); /* from, to, len */
+
+ res = s_mp_redc(&accum1, mmm);
+ mp_exch(&accum1, result);
+
+CLEANUP:
+ mp_clear(&accum1);
+ if (dBuf) {
+ if (dSize)
+ memset(dBuf, 0, dSize);
+ free(dBuf);
+ }
+
+ return res;
+}
+#undef SQR
+#undef MUL
+#endif
+
+#define SQR(a, b) \
+ MP_CHECKOK(mp_sqr(a, b)); \
+ MP_CHECKOK(s_mp_redc(b, mmm))
+
+#if defined(MP_MONT_USE_MP_MUL)
+#define MUL(x, a, b) \
+ MP_CHECKOK(mp_mul(a, oddPowers + (x), b)); \
+ MP_CHECKOK(s_mp_redc(b, mmm))
+#else
+#define MUL(x, a, b) \
+ MP_CHECKOK(s_mp_mul_mont(a, oddPowers + (x), b, mmm))
+#endif
+
+#define SWAPPA \
+ ptmp = pa1; \
+ pa1 = pa2; \
+ pa2 = ptmp
+
+/* Do modular exponentiation using integer multiply code. */
+mp_err
+mp_exptmod_i(const mp_int *montBase,
+ const mp_int *exponent,
+ const mp_int *modulus,
+ mp_int *result,
+ mp_mont_modulus *mmm,
+ int nLen,
+ mp_size bits_in_exponent,
+ mp_size window_bits,
+ mp_size odd_ints)
+{
+ mp_int *pa1, *pa2, *ptmp;
+ mp_size i;
+ mp_err res;
+ int expOff;
+ mp_int accum1, accum2, power2, oddPowers[MAX_ODD_INTS];
+
+ /* power2 = base ** 2; oddPowers[i] = base ** (2*i + 1); */
+ /* oddPowers[i] = base ** (2*i + 1); */
+
+ MP_DIGITS(&accum1) = 0;
+ MP_DIGITS(&accum2) = 0;
+ MP_DIGITS(&power2) = 0;
+ for (i = 0; i < MAX_ODD_INTS; ++i) {
+ MP_DIGITS(oddPowers + i) = 0;
+ }
+
+ MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
+ MP_CHECKOK(mp_init_size(&accum2, 3 * nLen + 2));
+
+ MP_CHECKOK(mp_init_copy(&oddPowers[0], montBase));
+
+ MP_CHECKOK(mp_init_size(&power2, nLen + 2 * MP_USED(montBase) + 2));
+ MP_CHECKOK(mp_sqr(montBase, &power2)); /* power2 = montBase ** 2 */
+ MP_CHECKOK(s_mp_redc(&power2, mmm));
+
+ for (i = 1; i < odd_ints; ++i) {
+ MP_CHECKOK(mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2));
+ MP_CHECKOK(mp_mul(oddPowers + (i - 1), &power2, oddPowers + i));
+ MP_CHECKOK(s_mp_redc(oddPowers + i, mmm));
+ }
+
+ /* set accumulator to montgomery residue of 1 */
+ mp_set(&accum1, 1);
+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
+ pa1 = &accum1;
+ pa2 = &accum2;
+
+ for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
+ mp_size smallExp;
+ MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
+ smallExp = (mp_size)res;
+
+ if (window_bits == 1) {
+ if (!smallExp) {
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 1) {
+ SQR(pa1, pa2);
+ MUL(0, pa2, pa1);
+ } else {
+ abort();
+ }
+ } else if (window_bits == 4) {
+ if (!smallExp) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ } else if (smallExp & 1) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 2, pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 2) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp / 4, pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 4) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 8, pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 8) {
+ SQR(pa1, pa2);
+ MUL(smallExp / 16, pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else {
+ abort();
+ }
+ } else if (window_bits == 5) {
+ if (!smallExp) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 1) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp / 2, pa2, pa1);
+ } else if (smallExp & 2) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 4, pa1, pa2);
+ SQR(pa2, pa1);
+ } else if (smallExp & 4) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp / 8, pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ } else if (smallExp & 8) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 16, pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ } else if (smallExp & 0x10) {
+ SQR(pa1, pa2);
+ MUL(smallExp / 32, pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ } else {
+ abort();
+ }
+ } else if (window_bits == 6) {
+ if (!smallExp) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ } else if (smallExp & 1) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 2, pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 2) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp / 4, pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 4) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 8, pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 8) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp / 16, pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 0x10) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 32, pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 0x20) {
+ SQR(pa1, pa2);
+ MUL(smallExp / 64, pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else {
+ abort();
+ }
+ } else {
+ abort();
+ }
+ }
+
+ res = s_mp_redc(pa1, mmm);
+ mp_exch(pa1, result);
+
+CLEANUP:
+ mp_clear(&accum1);
+ mp_clear(&accum2);
+ mp_clear(&power2);
+ for (i = 0; i < odd_ints; ++i) {
+ mp_clear(oddPowers + i);
+ }
+ return res;
+}
+#undef SQR
+#undef MUL
+
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+unsigned int mp_using_cache_safe_exp = 1;
+#endif
+
+mp_err
+mp_set_safe_modexp(int value)
+{
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+ mp_using_cache_safe_exp = value;
+ return MP_OKAY;
+#else
+ if (value == 0) {
+ return MP_OKAY;
+ }
+ return MP_BADARG;
+#endif
+}
+
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+#define WEAVE_WORD_SIZE 4
+
+/*
+ * mpi_to_weave takes an array of bignums, a matrix in which each bignum
+ * occupies all the columns of a row, and transposes it into a matrix in
+ * which each bignum occupies a column of every row. The first row of the
+ * input matrix becomes the first column of the output matrix. The n'th
+ * row of input becomes the n'th column of output. The input data is said
+ * to be "interleaved" or "woven" into the output matrix.
+ *
+ * The array of bignums is left in this woven form. Each time a single
+ * bignum value is needed, it is recreated by fetching the n'th column,
+ * forming a single row which is the new bignum.
+ *
+ * The purpose of this interleaving is make it impossible to determine which
+ * of the bignums is being used in any one operation by examining the pattern
+ * of cache misses.
+ *
+ * The weaving function does not transpose the entire input matrix in one call.
+ * It transposes 4 rows of mp_ints into their respective columns of output.
+ *
+ * This implementation treats each mp_int bignum as an array of mp_digits,
+ * It stores those bytes as a column of mp_digits in the output matrix. It
+ * doesn't care if the machine uses big-endian or little-endian byte ordering
+ * within mp_digits.
+ *
+ * "bignums" is an array of mp_ints.
+ * It points to four rows, four mp_ints, a subset of a larger array of mp_ints.
+ *
+ * "weaved" is the weaved output matrix.
+ * The first byte of bignums[0] is stored in weaved[0].
+ *
+ * "nBignums" is the total number of bignums in the array of which "bignums"
+ * is a part.
+ *
+ * "nDigits" is the size in mp_digits of each mp_int in the "bignums" array.
+ * mp_ints that use less than nDigits digits are logically padded with zeros
+ * while being stored in the weaved array.
+ */
+mp_err
+mpi_to_weave(const mp_int *bignums,
+ mp_digit *weaved,
+ mp_size nDigits, /* in each mp_int of input */
+ mp_size nBignums) /* in the entire source array */
+{
+ mp_size i;
+ mp_digit *endDest = weaved + (nDigits * nBignums);
+
+ for (i = 0; i < WEAVE_WORD_SIZE; i++) {
+ mp_size used = MP_USED(&bignums[i]);
+ mp_digit *pSrc = MP_DIGITS(&bignums[i]);
+ mp_digit *endSrc = pSrc + used;
+ mp_digit *pDest = weaved + i;
+
+ ARGCHK(MP_SIGN(&bignums[i]) == MP_ZPOS, MP_BADARG);
+ ARGCHK(used <= nDigits, MP_BADARG);
+
+ for (; pSrc < endSrc; pSrc++) {
+ *pDest = *pSrc;
+ pDest += nBignums;
+ }
+ while (pDest < endDest) {
+ *pDest = 0;
+ pDest += nBignums;
+ }
+ }
+
+ return MP_OKAY;
+}
+
+/*
+ * These functions return 0xffffffff if the output is true, and 0 otherwise.
+ */
+#define CONST_TIME_MSB(x) (0L - ((x) >> (8 * sizeof(x) - 1)))
+#define CONST_TIME_EQ_Z(x) CONST_TIME_MSB(~(x) & ((x)-1))
+#define CONST_TIME_EQ(a, b) CONST_TIME_EQ_Z((a) ^ (b))
+
+/* Reverse the operation above for one mp_int.
+ * Reconstruct one mp_int from its column in the weaved array.
+ * Every read accesses every element of the weaved array, in order to
+ * avoid timing attacks based on patterns of memory accesses.
+ */
+mp_err
+weave_to_mpi(mp_int *a, /* out, result */
+ const mp_digit *weaved, /* in, byte matrix */
+ mp_size index, /* which column to read */
+ mp_size nDigits, /* number of mp_digits in each bignum */
+ mp_size nBignums) /* width of the matrix */
+{
+ /* these are indices, but need to be the same size as mp_digit
+ * because of the CONST_TIME operations */
+ mp_digit i, j;
+ mp_digit d;
+ mp_digit *pDest = MP_DIGITS(a);
+
+ MP_SIGN(a) = MP_ZPOS;
+ MP_USED(a) = nDigits;
+
+ assert(weaved != NULL);
+
+ /* Fetch the proper column in constant time, indexing over the whole array */
+ for (i = 0; i < nDigits; ++i) {
+ d = 0;
+ for (j = 0; j < nBignums; ++j) {
+ d |= weaved[i * nBignums + j] & CONST_TIME_EQ(j, index);
+ }
+ pDest[i] = d;
+ }
+
+ s_mp_clamp(a);
+ return MP_OKAY;
+}
+
+#define SQR(a, b) \
+ MP_CHECKOK(mp_sqr(a, b)); \
+ MP_CHECKOK(s_mp_redc(b, mmm))
+
+#if defined(MP_MONT_USE_MP_MUL)
+#define MUL_NOWEAVE(x, a, b) \
+ MP_CHECKOK(mp_mul(a, x, b)); \
+ MP_CHECKOK(s_mp_redc(b, mmm))
+#else
+#define MUL_NOWEAVE(x, a, b) \
+ MP_CHECKOK(s_mp_mul_mont(a, x, b, mmm))
+#endif
+
+#define MUL(x, a, b) \
+ MP_CHECKOK(weave_to_mpi(&tmp, powers, (x), nLen, num_powers)); \
+ MUL_NOWEAVE(&tmp, a, b)
+
+#define SWAPPA \
+ ptmp = pa1; \
+ pa1 = pa2; \
+ pa2 = ptmp
+#define MP_ALIGN(x, y) ((((ptrdiff_t)(x)) + ((y)-1)) & (((ptrdiff_t)0) - (y)))
+
+/* Do modular exponentiation using integer multiply code. */
+mp_err
+mp_exptmod_safe_i(const mp_int *montBase,
+ const mp_int *exponent,
+ const mp_int *modulus,
+ mp_int *result,
+ mp_mont_modulus *mmm,
+ int nLen,
+ mp_size bits_in_exponent,
+ mp_size window_bits,
+ mp_size num_powers)
+{
+ mp_int *pa1, *pa2, *ptmp;
+ mp_size i;
+ mp_size first_window;
+ mp_err res;
+ int expOff;
+ mp_int accum1, accum2, accum[WEAVE_WORD_SIZE];
+ mp_int tmp;
+ mp_digit *powersArray = NULL;
+ mp_digit *powers = NULL;
+
+ MP_DIGITS(&accum1) = 0;
+ MP_DIGITS(&accum2) = 0;
+ MP_DIGITS(&accum[0]) = 0;
+ MP_DIGITS(&accum[1]) = 0;
+ MP_DIGITS(&accum[2]) = 0;
+ MP_DIGITS(&accum[3]) = 0;
+ MP_DIGITS(&tmp) = 0;
+
+ /* grab the first window value. This allows us to preload accumulator1
+ * and save a conversion, some squares and a multiple*/
+ MP_CHECKOK(mpl_get_bits(exponent,
+ bits_in_exponent - window_bits, window_bits));
+ first_window = (mp_size)res;
+
+ MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
+ MP_CHECKOK(mp_init_size(&accum2, 3 * nLen + 2));
+
+ /* build the first WEAVE_WORD powers inline */
+ /* if WEAVE_WORD_SIZE is not 4, this code will have to change */
+ if (num_powers > 2) {
+ MP_CHECKOK(mp_init_size(&accum[0], 3 * nLen + 2));
+ MP_CHECKOK(mp_init_size(&accum[1], 3 * nLen + 2));
+ MP_CHECKOK(mp_init_size(&accum[2], 3 * nLen + 2));
+ MP_CHECKOK(mp_init_size(&accum[3], 3 * nLen + 2));
+ mp_set(&accum[0], 1);
+ MP_CHECKOK(mp_to_mont(&accum[0], &(mmm->N), &accum[0]));
+ MP_CHECKOK(mp_copy(montBase, &accum[1]));
+ SQR(montBase, &accum[2]);
+ MUL_NOWEAVE(montBase, &accum[2], &accum[3]);
+ powersArray = (mp_digit *)malloc(num_powers * (nLen * sizeof(mp_digit) + 1));
+ if (!powersArray) {
+ res = MP_MEM;
+ goto CLEANUP;
+ }
+ /* powers[i] = base ** (i); */
+ powers = (mp_digit *)MP_ALIGN(powersArray, num_powers);
+ MP_CHECKOK(mpi_to_weave(accum, powers, nLen, num_powers));
+ if (first_window < 4) {
+ MP_CHECKOK(mp_copy(&accum[first_window], &accum1));
+ first_window = num_powers;
+ }
+ } else {
+ if (first_window == 0) {
+ mp_set(&accum1, 1);
+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
+ } else {
+ /* assert first_window == 1? */
+ MP_CHECKOK(mp_copy(montBase, &accum1));
+ }
+ }
+
+ /*
+ * calculate all the powers in the powers array.
+ * this adds 2**(k-1)-2 square operations over just calculating the
+ * odd powers where k is the window size in the two other mp_modexpt
+ * implementations in this file. We will get some of that
+ * back by not needing the first 'k' squares and one multiply for the
+ * first window.
+ * Given the value of 4 for WEAVE_WORD_SIZE, this loop will only execute if
+ * num_powers > 2, in which case powers will have been allocated.
+ */
+ for (i = WEAVE_WORD_SIZE; i < num_powers; i++) {
+ int acc_index = i & (WEAVE_WORD_SIZE - 1); /* i % WEAVE_WORD_SIZE */
+ if (i & 1) {
+ MUL_NOWEAVE(montBase, &accum[acc_index - 1], &accum[acc_index]);
+ /* we've filled the array do our 'per array' processing */
+ if (acc_index == (WEAVE_WORD_SIZE - 1)) {
+ MP_CHECKOK(mpi_to_weave(accum, powers + i - (WEAVE_WORD_SIZE - 1),
+ nLen, num_powers));
+
+ if (first_window <= i) {
+ MP_CHECKOK(mp_copy(&accum[first_window & (WEAVE_WORD_SIZE - 1)],
+ &accum1));
+ first_window = num_powers;
+ }
+ }
+ } else {
+ /* up to 8 we can find 2^i-1 in the accum array, but at 8 we our source
+ * and target are the same so we need to copy.. After that, the
+ * value is overwritten, so we need to fetch it from the stored
+ * weave array */
+ if (i > 2 * WEAVE_WORD_SIZE) {
+ MP_CHECKOK(weave_to_mpi(&accum2, powers, i / 2, nLen, num_powers));
+ SQR(&accum2, &accum[acc_index]);
+ } else {
+ int half_power_index = (i / 2) & (WEAVE_WORD_SIZE - 1);
+ if (half_power_index == acc_index) {
+ /* copy is cheaper than weave_to_mpi */
+ MP_CHECKOK(mp_copy(&accum[half_power_index], &accum2));
+ SQR(&accum2, &accum[acc_index]);
+ } else {
+ SQR(&accum[half_power_index], &accum[acc_index]);
+ }
+ }
+ }
+ }
+/* if the accum1 isn't set, Then there is something wrong with our logic
+ * above and is an internal programming error.
+ */
+#if MP_ARGCHK == 2
+ assert(MP_USED(&accum1) != 0);
+#endif
+
+ /* set accumulator to montgomery residue of 1 */
+ pa1 = &accum1;
+ pa2 = &accum2;
+
+ /* tmp is not used if window_bits == 1. */
+ if (window_bits != 1) {
+ MP_CHECKOK(mp_init_size(&tmp, 3 * nLen + 2));
+ }
+
+ for (expOff = bits_in_exponent - window_bits * 2; expOff >= 0; expOff -= window_bits) {
+ mp_size smallExp;
+ MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
+ smallExp = (mp_size)res;
+
+ /* handle unroll the loops */
+ switch (window_bits) {
+ case 1:
+ if (!smallExp) {
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 1) {
+ SQR(pa1, pa2);
+ MUL_NOWEAVE(montBase, pa2, pa1);
+ } else {
+ abort();
+ }
+ break;
+ case 6:
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ /* fall through */
+ case 4:
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp, pa1, pa2);
+ SWAPPA;
+ break;
+ case 5:
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp, pa2, pa1);
+ break;
+ default:
+ abort(); /* could do a loop? */
+ }
+ }
+
+ res = s_mp_redc(pa1, mmm);
+ mp_exch(pa1, result);
+
+CLEANUP:
+ mp_clear(&accum1);
+ mp_clear(&accum2);
+ mp_clear(&accum[0]);
+ mp_clear(&accum[1]);
+ mp_clear(&accum[2]);
+ mp_clear(&accum[3]);
+ mp_clear(&tmp);
+ /* zero required by FIPS here, can't use PORT_ZFree
+ * because mpi doesn't link with util */
+ if (powers) {
+ PORT_Memset(powers, 0, num_powers * sizeof(mp_digit));
+ }
+ free(powersArray);
+ return res;
+}
+#undef SQR
+#undef MUL
+#endif
+
+mp_err
+mp_exptmod(const mp_int *inBase, const mp_int *exponent,
+ const mp_int *modulus, mp_int *result)
+{
+ const mp_int *base;
+ mp_size bits_in_exponent, i, window_bits, odd_ints;
+ mp_err res;
+ int nLen;
+ mp_int montBase, goodBase;
+ mp_mont_modulus mmm;
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+ static unsigned int max_window_bits;
+#endif
+
+ /* function for computing n0prime only works if n0 is odd */
+ if (!mp_isodd(modulus))
+ return s_mp_exptmod(inBase, exponent, modulus, result);
+
+ if (mp_cmp_z(inBase) == MP_LT)
+ return MP_RANGE;
+ MP_DIGITS(&montBase) = 0;
+ MP_DIGITS(&goodBase) = 0;
+
+ if (mp_cmp(inBase, modulus) < 0) {
+ base = inBase;
+ } else {
+ MP_CHECKOK(mp_init(&goodBase));
+ base = &goodBase;
+ MP_CHECKOK(mp_mod(inBase, modulus, &goodBase));
+ }
+
+ nLen = MP_USED(modulus);
+ MP_CHECKOK(mp_init_size(&montBase, 2 * nLen + 2));
+
+ mmm.N = *modulus; /* a copy of the mp_int struct */
+
+ /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX
+ ** where n0 = least significant mp_digit of N, the modulus.
+ */
+ mmm.n0prime = mp_calculate_mont_n0i(modulus);
+
+ MP_CHECKOK(mp_to_mont(base, modulus, &montBase));
+
+ bits_in_exponent = mpl_significant_bits(exponent);
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+ if (mp_using_cache_safe_exp) {
+ if (bits_in_exponent > 780)
+ window_bits = 6;
+ else if (bits_in_exponent > 256)
+ window_bits = 5;
+ else if (bits_in_exponent > 20)
+ window_bits = 4;
+ /* RSA public key exponents are typically under 20 bits (common values
+ * are: 3, 17, 65537) and a 4-bit window is inefficient
+ */
+ else
+ window_bits = 1;
+ } else
+#endif
+ if (bits_in_exponent > 480)
+ window_bits = 6;
+ else if (bits_in_exponent > 160)
+ window_bits = 5;
+ else if (bits_in_exponent > 20)
+ window_bits = 4;
+ /* RSA public key exponents are typically under 20 bits (common values
+ * are: 3, 17, 65537) and a 4-bit window is inefficient
+ */
+ else
+ window_bits = 1;
+
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+ /*
+ * clamp the window size based on
+ * the cache line size.
+ */
+ if (!max_window_bits) {
+ unsigned long cache_size = s_mpi_getProcessorLineSize();
+ /* processor has no cache, use 'fast' code always */
+ if (cache_size == 0) {
+ mp_using_cache_safe_exp = 0;
+ }
+ if ((cache_size == 0) || (cache_size >= 64)) {
+ max_window_bits = 6;
+ } else if (cache_size >= 32) {
+ max_window_bits = 5;
+ } else if (cache_size >= 16) {
+ max_window_bits = 4;
+ } else
+ max_window_bits = 1; /* should this be an assert? */
+ }
+
+ /* clamp the window size down before we caclulate bits_in_exponent */
+ if (mp_using_cache_safe_exp) {
+ if (window_bits > max_window_bits) {
+ window_bits = max_window_bits;
+ }
+ }
+#endif
+
+ odd_ints = 1 << (window_bits - 1);
+ i = bits_in_exponent % window_bits;
+ if (i != 0) {
+ bits_in_exponent += window_bits - i;
+ }
+
+#ifdef MP_USING_MONT_MULF
+ if (mp_using_mont_mulf) {
+ MP_CHECKOK(s_mp_pad(&montBase, nLen));
+ res = mp_exptmod_f(&montBase, exponent, modulus, result, &mmm, nLen,
+ bits_in_exponent, window_bits, odd_ints);
+ } else
+#endif
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+ if (mp_using_cache_safe_exp) {
+ res = mp_exptmod_safe_i(&montBase, exponent, modulus, result, &mmm, nLen,
+ bits_in_exponent, window_bits, 1 << window_bits);
+ } else
+#endif
+ res = mp_exptmod_i(&montBase, exponent, modulus, result, &mmm, nLen,
+ bits_in_exponent, window_bits, odd_ints);
+
+CLEANUP:
+ mp_clear(&montBase);
+ mp_clear(&goodBase);
+ /* Don't mp_clear mmm.N because it is merely a copy of modulus.
+ ** Just zap it.
+ */
+ memset(&mmm, 0, sizeof mmm);
+ return res;
+}
diff --git a/security/nss/lib/freebl/mpi/mpprime.c b/security/nss/lib/freebl/mpi/mpprime.c
new file mode 100644
index 0000000000..b757150e79
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpprime.c
@@ -0,0 +1,610 @@
+/*
+ * mpprime.c
+ *
+ * Utilities for finding and working with prime and pseudo-prime
+ * integers
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#include "mpprime.h"
+#include "mplogic.h"
+#include <stdlib.h>
+#include <string.h>
+
+#define SMALL_TABLE 0 /* determines size of hard-wired prime table */
+
+#define RANDOM() rand()
+
+#include "primes.c" /* pull in the prime digit table */
+
+/*
+ Test if any of a given vector of digits divides a. If not, MP_NO
+ is returned; otherwise, MP_YES is returned and 'which' is set to
+ the index of the integer in the vector which divided a.
+ */
+mp_err s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which);
+
+/* {{{ mpp_divis(a, b) */
+
+/*
+ mpp_divis(a, b)
+
+ Returns MP_YES if a is divisible by b, or MP_NO if it is not.
+ */
+
+mp_err
+mpp_divis(mp_int *a, mp_int *b)
+{
+ mp_err res;
+ mp_int rem;
+
+ if ((res = mp_init(&rem)) != MP_OKAY)
+ return res;
+
+ if ((res = mp_mod(a, b, &rem)) != MP_OKAY)
+ goto CLEANUP;
+
+ if (mp_cmp_z(&rem) == 0)
+ res = MP_YES;
+ else
+ res = MP_NO;
+
+CLEANUP:
+ mp_clear(&rem);
+ return res;
+
+} /* end mpp_divis() */
+
+/* }}} */
+
+/* {{{ mpp_divis_d(a, d) */
+
+/*
+ mpp_divis_d(a, d)
+
+ Return MP_YES if a is divisible by d, or MP_NO if it is not.
+ */
+
+mp_err
+mpp_divis_d(mp_int *a, mp_digit d)
+{
+ mp_err res;
+ mp_digit rem;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ if (d == 0)
+ return MP_NO;
+
+ if ((res = mp_mod_d(a, d, &rem)) != MP_OKAY)
+ return res;
+
+ if (rem == 0)
+ return MP_YES;
+ else
+ return MP_NO;
+
+} /* end mpp_divis_d() */
+
+/* }}} */
+
+/* {{{ mpp_random(a) */
+
+/*
+ mpp_random(a)
+
+ Assigns a random value to a. This value is generated using the
+ standard C library's rand() function, so it should not be used for
+ cryptographic purposes, but it should be fine for primality testing,
+ since all we really care about there is good statistical properties.
+
+ As many digits as a currently has are filled with random digits.
+ */
+
+mp_err
+mpp_random(mp_int *a)
+
+{
+ mp_digit next = 0;
+ unsigned int ix, jx;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ for (ix = 0; ix < USED(a); ix++) {
+ for (jx = 0; jx < sizeof(mp_digit); jx++) {
+ next = (next << CHAR_BIT) | (RANDOM() & UCHAR_MAX);
+ }
+ DIGIT(a, ix) = next;
+ }
+
+ return MP_OKAY;
+
+} /* end mpp_random() */
+
+/* }}} */
+
+static mpp_random_fn mpp_random_insecure = &mpp_random;
+
+/* {{{ mpp_random_size(a, prec) */
+
+mp_err
+mpp_random_size(mp_int *a, mp_size prec)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && prec > 0, MP_BADARG);
+
+ if ((res = s_mp_pad(a, prec)) != MP_OKAY)
+ return res;
+
+ return (*mpp_random_insecure)(a);
+
+} /* end mpp_random_size() */
+
+/* }}} */
+
+/* {{{ mpp_divis_vector(a, vec, size, which) */
+
+/*
+ mpp_divis_vector(a, vec, size, which)
+
+ Determines if a is divisible by any of the 'size' digits in vec.
+ Returns MP_YES and sets 'which' to the index of the offending digit,
+ if it is; returns MP_NO if it is not.
+ */
+
+mp_err
+mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which)
+{
+ ARGCHK(a != NULL && vec != NULL && size > 0, MP_BADARG);
+
+ return s_mpp_divp(a, vec, size, which);
+
+} /* end mpp_divis_vector() */
+
+/* }}} */
+
+/* {{{ mpp_divis_primes(a, np) */
+
+/*
+ mpp_divis_primes(a, np)
+
+ Test whether a is divisible by any of the first 'np' primes. If it
+ is, returns MP_YES and sets *np to the value of the digit that did
+ it. If not, returns MP_NO.
+ */
+mp_err
+mpp_divis_primes(mp_int *a, mp_digit *np)
+{
+ int size, which;
+ mp_err res;
+
+ ARGCHK(a != NULL && np != NULL, MP_BADARG);
+
+ size = (int)*np;
+ if (size > prime_tab_size)
+ size = prime_tab_size;
+
+ res = mpp_divis_vector(a, prime_tab, size, &which);
+ if (res == MP_YES)
+ *np = prime_tab[which];
+
+ return res;
+
+} /* end mpp_divis_primes() */
+
+/* }}} */
+
+/* {{{ mpp_fermat(a, w) */
+
+/*
+ Using w as a witness, try pseudo-primality testing based on Fermat's
+ little theorem. If a is prime, and (w, a) = 1, then w^a == w (mod
+ a). So, we compute z = w^a (mod a) and compare z to w; if they are
+ equal, the test passes and we return MP_YES. Otherwise, we return
+ MP_NO.
+ */
+mp_err
+mpp_fermat(mp_int *a, mp_digit w)
+{
+ mp_int base, test;
+ mp_err res;
+
+ if ((res = mp_init(&base)) != MP_OKAY)
+ return res;
+
+ mp_set(&base, w);
+
+ if ((res = mp_init(&test)) != MP_OKAY)
+ goto TEST;
+
+ /* Compute test = base^a (mod a) */
+ if ((res = mp_exptmod(&base, a, a, &test)) != MP_OKAY)
+ goto CLEANUP;
+
+ if (mp_cmp(&base, &test) == 0)
+ res = MP_YES;
+ else
+ res = MP_NO;
+
+CLEANUP:
+ mp_clear(&test);
+TEST:
+ mp_clear(&base);
+
+ return res;
+
+} /* end mpp_fermat() */
+
+/* }}} */
+
+/*
+ Perform the fermat test on each of the primes in a list until
+ a) one of them shows a is not prime, or
+ b) the list is exhausted.
+ Returns: MP_YES if it passes tests.
+ MP_NO if fermat test reveals it is composite
+ Some MP error code if some other error occurs.
+ */
+mp_err
+mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes)
+{
+ mp_err rv = MP_YES;
+
+ while (nPrimes-- > 0 && rv == MP_YES) {
+ rv = mpp_fermat(a, *primes++);
+ }
+ return rv;
+}
+
+/* {{{ mpp_pprime(a, nt) */
+
+/*
+ mpp_pprime(a, nt)
+
+ Performs nt iteration of the Miller-Rabin probabilistic primality
+ test on a. Returns MP_YES if the tests pass, MP_NO if one fails.
+ If MP_NO is returned, the number is definitely composite. If MP_YES
+ is returned, it is probably prime (but that is not guaranteed).
+ */
+
+mp_err
+mpp_pprime(mp_int *a, int nt)
+{
+ return mpp_pprime_ext_random(a, nt, mpp_random_insecure);
+}
+
+mp_err
+mpp_pprime_ext_random(mp_int *a, int nt, mpp_random_fn random)
+{
+ mp_err res;
+ mp_int x, amo, m, z; /* "amo" = "a minus one" */
+ int iter;
+ unsigned int jx;
+ mp_size b;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ MP_DIGITS(&x) = 0;
+ MP_DIGITS(&amo) = 0;
+ MP_DIGITS(&m) = 0;
+ MP_DIGITS(&z) = 0;
+
+ /* Initialize temporaries... */
+ MP_CHECKOK(mp_init(&amo));
+ /* Compute amo = a - 1 for what follows... */
+ MP_CHECKOK(mp_sub_d(a, 1, &amo));
+
+ b = mp_trailing_zeros(&amo);
+ if (!b) { /* a was even ? */
+ res = MP_NO;
+ goto CLEANUP;
+ }
+
+ MP_CHECKOK(mp_init_size(&x, MP_USED(a)));
+ MP_CHECKOK(mp_init(&z));
+ MP_CHECKOK(mp_init(&m));
+ MP_CHECKOK(mp_div_2d(&amo, b, &m, 0));
+
+ /* Do the test nt times... */
+ for (iter = 0; iter < nt; iter++) {
+
+ /* Choose a random value for 1 < x < a */
+ MP_CHECKOK(s_mp_pad(&x, USED(a)));
+ MP_CHECKOK((*random)(&x));
+ MP_CHECKOK(mp_mod(&x, a, &x));
+ if (mp_cmp_d(&x, 1) <= 0) {
+ iter--; /* don't count this iteration */
+ continue; /* choose a new x */
+ }
+
+ /* Compute z = (x ** m) mod a */
+ MP_CHECKOK(mp_exptmod(&x, &m, a, &z));
+
+ if (mp_cmp_d(&z, 1) == 0 || mp_cmp(&z, &amo) == 0) {
+ res = MP_YES;
+ continue;
+ }
+
+ res = MP_NO; /* just in case the following for loop never executes. */
+ for (jx = 1; jx < b; jx++) {
+ /* z = z^2 (mod a) */
+ MP_CHECKOK(mp_sqrmod(&z, a, &z));
+ res = MP_NO; /* previous line set res to MP_YES */
+
+ if (mp_cmp_d(&z, 1) == 0) {
+ break;
+ }
+ if (mp_cmp(&z, &amo) == 0) {
+ res = MP_YES;
+ break;
+ }
+ } /* end testing loop */
+
+ /* If the test passes, we will continue iterating, but a failed
+ test means the candidate is definitely NOT prime, so we will
+ immediately break out of this loop
+ */
+ if (res == MP_NO)
+ break;
+
+ } /* end iterations loop */
+
+CLEANUP:
+ mp_clear(&m);
+ mp_clear(&z);
+ mp_clear(&x);
+ mp_clear(&amo);
+ return res;
+
+} /* end mpp_pprime() */
+
+/* }}} */
+
+/* Produce table of composites from list of primes and trial value.
+** trial must be odd. List of primes must not include 2.
+** sieve should have dimension >= MAXPRIME/2, where MAXPRIME is largest
+** prime in list of primes. After this function is finished,
+** if sieve[i] is non-zero, then (trial + 2*i) is composite.
+** Each prime used in the sieve costs one division of trial, and eliminates
+** one or more values from the search space. (3 eliminates 1/3 of the values
+** alone!) Each value left in the search space costs 1 or more modular
+** exponentations. So, these divisions are a bargain!
+*/
+mp_err
+mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes,
+ unsigned char *sieve, mp_size nSieve)
+{
+ mp_err res;
+ mp_digit rem;
+ mp_size ix;
+ unsigned long offset;
+
+ memset(sieve, 0, nSieve);
+
+ for (ix = 0; ix < nPrimes; ix++) {
+ mp_digit prime = primes[ix];
+ mp_size i;
+ if ((res = mp_mod_d(trial, prime, &rem)) != MP_OKAY)
+ return res;
+
+ if (rem == 0) {
+ offset = 0;
+ } else {
+ offset = prime - rem;
+ }
+
+ for (i = offset; i < nSieve * 2; i += prime) {
+ if (i % 2 == 0) {
+ sieve[i / 2] = 1;
+ }
+ }
+ }
+
+ return MP_OKAY;
+}
+
+#define SIEVE_SIZE 32 * 1024
+
+mp_err
+mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong)
+{
+ return mpp_make_prime_ext_random(start, nBits, strong, mpp_random_insecure);
+}
+
+mp_err
+mpp_make_prime_ext_random(mp_int *start, mp_size nBits, mp_size strong, mpp_random_fn random)
+{
+ mp_digit np;
+ mp_err res;
+ unsigned int i = 0;
+ mp_int trial;
+ mp_int q;
+ mp_size num_tests;
+ unsigned char *sieve;
+
+ ARGCHK(start != 0, MP_BADARG);
+ ARGCHK(nBits > 16, MP_RANGE);
+
+ sieve = malloc(SIEVE_SIZE);
+ ARGCHK(sieve != NULL, MP_MEM);
+
+ MP_DIGITS(&trial) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_CHECKOK(mp_init(&trial));
+ MP_CHECKOK(mp_init(&q));
+ /* values originally taken from table 4.4,
+ * HandBook of Applied Cryptography, augmented by FIPS-186
+ * requirements, Table C.2 and C.3 */
+ if (nBits >= 2000) {
+ num_tests = 3;
+ } else if (nBits >= 1536) {
+ num_tests = 4;
+ } else if (nBits >= 1024) {
+ num_tests = 5;
+ } else if (nBits >= 550) {
+ num_tests = 6;
+ } else if (nBits >= 450) {
+ num_tests = 7;
+ } else if (nBits >= 400) {
+ num_tests = 8;
+ } else if (nBits >= 350) {
+ num_tests = 9;
+ } else if (nBits >= 300) {
+ num_tests = 10;
+ } else if (nBits >= 250) {
+ num_tests = 20;
+ } else if (nBits >= 200) {
+ num_tests = 41;
+ } else if (nBits >= 100) {
+ num_tests = 38; /* funny anomaly in the FIPS tables, for aux primes, the
+ * required more iterations for larger aux primes */
+ } else
+ num_tests = 50;
+
+ if (strong)
+ --nBits;
+ MP_CHECKOK(mpl_set_bit(start, nBits - 1, 1));
+ MP_CHECKOK(mpl_set_bit(start, 0, 1));
+ for (i = mpl_significant_bits(start) - 1; i >= nBits; --i) {
+ MP_CHECKOK(mpl_set_bit(start, i, 0));
+ }
+ /* start sieveing with prime value of 3. */
+ MP_CHECKOK(mpp_sieve(start, prime_tab + 1, prime_tab_size - 1,
+ sieve, SIEVE_SIZE));
+
+#ifdef DEBUG_SIEVE
+ res = 0;
+ for (i = 0; i < SIEVE_SIZE; ++i) {
+ if (!sieve[i])
+ ++res;
+ }
+ fprintf(stderr, "sieve found %d potential primes.\n", res);
+#define FPUTC(x, y) fputc(x, y)
+#else
+#define FPUTC(x, y)
+#endif
+
+ res = MP_NO;
+ for (i = 0; i < SIEVE_SIZE; ++i) {
+ if (sieve[i]) /* this number is composite */
+ continue;
+ MP_CHECKOK(mp_add_d(start, 2 * i, &trial));
+ FPUTC('.', stderr);
+ /* run a Fermat test */
+ res = mpp_fermat(&trial, 2);
+ if (res != MP_OKAY) {
+ if (res == MP_NO)
+ continue; /* was composite */
+ goto CLEANUP;
+ }
+
+ FPUTC('+', stderr);
+ /* If that passed, run some Miller-Rabin tests */
+ res = mpp_pprime_ext_random(&trial, num_tests, random);
+ if (res != MP_OKAY) {
+ if (res == MP_NO)
+ continue; /* was composite */
+ goto CLEANUP;
+ }
+ FPUTC('!', stderr);
+
+ if (!strong)
+ break; /* success !! */
+
+ /* At this point, we have strong evidence that our candidate
+ is itself prime. If we want a strong prime, we need now
+ to test q = 2p + 1 for primality...
+ */
+ MP_CHECKOK(mp_mul_2(&trial, &q));
+ MP_CHECKOK(mp_add_d(&q, 1, &q));
+
+ /* Test q for small prime divisors ... */
+ np = prime_tab_size;
+ res = mpp_divis_primes(&q, &np);
+ if (res == MP_YES) { /* is composite */
+ mp_clear(&q);
+ continue;
+ }
+ if (res != MP_NO)
+ goto CLEANUP;
+
+ /* And test with Fermat, as with its parent ... */
+ res = mpp_fermat(&q, 2);
+ if (res != MP_YES) {
+ mp_clear(&q);
+ if (res == MP_NO)
+ continue; /* was composite */
+ goto CLEANUP;
+ }
+
+ /* And test with Miller-Rabin, as with its parent ... */
+ res = mpp_pprime_ext_random(&q, num_tests, random);
+ if (res != MP_YES) {
+ mp_clear(&q);
+ if (res == MP_NO)
+ continue; /* was composite */
+ goto CLEANUP;
+ }
+
+ /* If it passed, we've got a winner */
+ mp_exch(&q, &trial);
+ mp_clear(&q);
+ break;
+
+ } /* end of loop through sieved values */
+ if (res == MP_YES)
+ mp_exch(&trial, start);
+CLEANUP:
+ mp_clear(&trial);
+ mp_clear(&q);
+ if (sieve != NULL) {
+ memset(sieve, 0, SIEVE_SIZE);
+ free(sieve);
+ }
+ return res;
+}
+
+/*========================================================================*/
+/*------------------------------------------------------------------------*/
+/* Static functions visible only to the library internally */
+
+/* {{{ s_mpp_divp(a, vec, size, which) */
+
+/*
+ Test for divisibility by members of a vector of digits. Returns
+ MP_NO if a is not divisible by any of them; returns MP_YES and sets
+ 'which' to the index of the offender, if it is. Will stop on the
+ first digit against which a is divisible.
+ */
+
+mp_err
+s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which)
+{
+ mp_err res;
+ mp_digit rem;
+
+ int ix;
+
+ for (ix = 0; ix < size; ix++) {
+ if ((res = mp_mod_d(a, vec[ix], &rem)) != MP_OKAY)
+ return res;
+
+ if (rem == 0) {
+ if (which)
+ *which = ix;
+ return MP_YES;
+ }
+ }
+
+ return MP_NO;
+
+} /* end s_mpp_divp() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS */
diff --git a/security/nss/lib/freebl/mpi/mpprime.h b/security/nss/lib/freebl/mpi/mpprime.h
new file mode 100644
index 0000000000..0bdc6598ce
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpprime.h
@@ -0,0 +1,48 @@
+/*
+ * mpprime.h
+ *
+ * Utilities for finding and working with prime and pseudo-prime
+ * integers
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _H_MP_PRIME_
+#define _H_MP_PRIME_
+
+#include "mpi.h"
+
+SEC_BEGIN_PROTOS
+
+extern const int prime_tab_size; /* number of primes available */
+extern const mp_digit prime_tab[];
+
+/* Tests for divisibility */
+mp_err mpp_divis(mp_int *a, mp_int *b);
+mp_err mpp_divis_d(mp_int *a, mp_digit d);
+
+/* Random selection */
+mp_err mpp_random(mp_int *a);
+mp_err mpp_random_size(mp_int *a, mp_size prec);
+
+/* Type for a pointer to a user-provided mpp_random implementation */
+typedef mp_err (*mpp_random_fn)(mp_int *);
+
+/* Pseudo-primality testing */
+mp_err mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which);
+mp_err mpp_divis_primes(mp_int *a, mp_digit *np);
+mp_err mpp_fermat(mp_int *a, mp_digit w);
+mp_err mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes);
+mp_err mpp_pprime(mp_int *a, int nt);
+mp_err mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes,
+ unsigned char *sieve, mp_size nSieve);
+mp_err mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong);
+
+/* Pseudo-primality tests using a user-provided mpp_random implementation */
+mp_err mpp_pprime_ext_random(mp_int *a, int nt, mpp_random_fn random);
+mp_err mpp_make_prime_ext_random(mp_int *start, mp_size nBits, mp_size strong, mpp_random_fn random);
+
+SEC_END_PROTOS
+
+#endif /* end _H_MP_PRIME_ */
diff --git a/security/nss/lib/freebl/mpi/mpv_sparc.c b/security/nss/lib/freebl/mpi/mpv_sparc.c
new file mode 100644
index 0000000000..423311b65b
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpv_sparc.c
@@ -0,0 +1,221 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "vis_proto.h"
+
+/***************************************************************/
+
+typedef int t_s32;
+typedef unsigned int t_u32;
+#if defined(__sparcv9)
+typedef long t_s64;
+typedef unsigned long t_u64;
+#else
+typedef long long t_s64;
+typedef unsigned long long t_u64;
+#endif
+typedef double t_d64;
+
+/***************************************************************/
+
+typedef union {
+ t_d64 d64;
+ struct {
+ t_s32 i0;
+ t_s32 i1;
+ } i32s;
+} d64_2_i32;
+
+/***************************************************************/
+
+#define BUFF_SIZE 256
+
+#define A_BITS 19
+#define A_MASK ((1 << A_BITS) - 1)
+
+/***************************************************************/
+
+static t_u64 mask_cnst[] = {
+ 0x8000000080000000ull
+};
+
+/***************************************************************/
+
+#define DEF_VARS(N) \
+ t_d64 *py = (t_d64 *)y; \
+ t_d64 mask = *((t_d64 *)mask_cnst); \
+ t_d64 ca = (1u << 31) - 1; \
+ t_d64 da = (t_d64)a; \
+ t_s64 buff[N], s; \
+ d64_2_i32 dy
+
+/***************************************************************/
+
+#define MUL_U32_S64_2(i) \
+ dy.d64 = vis_fxnor(mask, py[i]); \
+ buff[2 * (i)] = (ca - (t_d64)dy.i32s.i0) * da; \
+ buff[2 * (i) + 1] = (ca - (t_d64)dy.i32s.i1) * da
+
+#define MUL_U32_S64_2_D(i) \
+ dy.d64 = vis_fxnor(mask, py[i]); \
+ d0 = ca - (t_d64)dy.i32s.i0; \
+ d1 = ca - (t_d64)dy.i32s.i1; \
+ buff[4 * (i)] = (t_s64)(d0 * da); \
+ buff[4 * (i) + 1] = (t_s64)(d0 * db); \
+ buff[4 * (i) + 2] = (t_s64)(d1 * da); \
+ buff[4 * (i) + 3] = (t_s64)(d1 * db)
+
+/***************************************************************/
+
+#define ADD_S64_U32(i) \
+ s = buff[i] + x[i] + c; \
+ z[i] = s; \
+ c = (s >> 32)
+
+#define ADD_S64_U32_D(i) \
+ s = buff[2 * (i)] + (((t_s64)(buff[2 * (i) + 1])) << A_BITS) + x[i] + uc; \
+ z[i] = s; \
+ uc = ((t_u64)s >> 32)
+
+/***************************************************************/
+
+#define MUL_U32_S64_8(i) \
+ MUL_U32_S64_2(i); \
+ MUL_U32_S64_2(i + 1); \
+ MUL_U32_S64_2(i + 2); \
+ MUL_U32_S64_2(i + 3)
+
+#define MUL_U32_S64_D_8(i) \
+ MUL_U32_S64_2_D(i); \
+ MUL_U32_S64_2_D(i + 1); \
+ MUL_U32_S64_2_D(i + 2); \
+ MUL_U32_S64_2_D(i + 3)
+
+/***************************************************************/
+
+#define ADD_S64_U32_8(i) \
+ ADD_S64_U32(i); \
+ ADD_S64_U32(i + 1); \
+ ADD_S64_U32(i + 2); \
+ ADD_S64_U32(i + 3); \
+ ADD_S64_U32(i + 4); \
+ ADD_S64_U32(i + 5); \
+ ADD_S64_U32(i + 6); \
+ ADD_S64_U32(i + 7)
+
+#define ADD_S64_U32_D_8(i) \
+ ADD_S64_U32_D(i); \
+ ADD_S64_U32_D(i + 1); \
+ ADD_S64_U32_D(i + 2); \
+ ADD_S64_U32_D(i + 3); \
+ ADD_S64_U32_D(i + 4); \
+ ADD_S64_U32_D(i + 5); \
+ ADD_S64_U32_D(i + 6); \
+ ADD_S64_U32_D(i + 7)
+
+/***************************************************************/
+
+t_u32
+mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a)
+{
+ if (a < (1 << A_BITS)) {
+
+ if (n == 8) {
+ DEF_VARS(8);
+ t_s32 c = 0;
+
+ MUL_U32_S64_8(0);
+ ADD_S64_U32_8(0);
+
+ return c;
+
+ } else if (n == 16) {
+ DEF_VARS(16);
+ t_s32 c = 0;
+
+ MUL_U32_S64_8(0);
+ MUL_U32_S64_8(4);
+ ADD_S64_U32_8(0);
+ ADD_S64_U32_8(8);
+
+ return c;
+
+ } else {
+ DEF_VARS(BUFF_SIZE);
+ t_s32 i, c = 0;
+
+#pragma pipeloop(0)
+ for (i = 0; i < (n + 1) / 2; i++) {
+ MUL_U32_S64_2(i);
+ }
+
+#pragma pipeloop(0)
+ for (i = 0; i < n; i++) {
+ ADD_S64_U32(i);
+ }
+
+ return c;
+ }
+ } else {
+
+ if (n == 8) {
+ DEF_VARS(2 * 8);
+ t_d64 d0, d1, db;
+ t_u32 uc = 0;
+
+ da = (t_d64)(a & A_MASK);
+ db = (t_d64)(a >> A_BITS);
+
+ MUL_U32_S64_D_8(0);
+ ADD_S64_U32_D_8(0);
+
+ return uc;
+
+ } else if (n == 16) {
+ DEF_VARS(2 * 16);
+ t_d64 d0, d1, db;
+ t_u32 uc = 0;
+
+ da = (t_d64)(a & A_MASK);
+ db = (t_d64)(a >> A_BITS);
+
+ MUL_U32_S64_D_8(0);
+ MUL_U32_S64_D_8(4);
+ ADD_S64_U32_D_8(0);
+ ADD_S64_U32_D_8(8);
+
+ return uc;
+
+ } else {
+ DEF_VARS(2 * BUFF_SIZE);
+ t_d64 d0, d1, db;
+ t_u32 i, uc = 0;
+
+ da = (t_d64)(a & A_MASK);
+ db = (t_d64)(a >> A_BITS);
+
+#pragma pipeloop(0)
+ for (i = 0; i < (n + 1) / 2; i++) {
+ MUL_U32_S64_2_D(i);
+ }
+
+#pragma pipeloop(0)
+ for (i = 0; i < n; i++) {
+ ADD_S64_U32_D(i);
+ }
+
+ return uc;
+ }
+ }
+}
+
+/***************************************************************/
+
+t_u32
+mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a)
+{
+ return mul_add(x, x, y, n, a);
+}
+
+/***************************************************************/
diff --git a/security/nss/lib/freebl/mpi/mpv_sparcv8.s b/security/nss/lib/freebl/mpi/mpv_sparcv8.s
new file mode 100644
index 0000000000..66122a1d9d
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpv_sparcv8.s
@@ -0,0 +1,1607 @@
+! Inner multiply loop functions for hybrid 32/64-bit Sparc v8plus CPUs.
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .section ".text",#alloc,#execinstr
+/* 000000 3 ( 0 0) */ .file "mpv_sparc.c"
+/* 000000 14 ( 0 0) */ .align 8
+!
+! SUBROUTINE .L_const_seg_900000106
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .L_const_seg_900000106: /* frequency 1.0 confidence 0.0 */
+/* 000000 19 ( 0 0) */ .word 1127219200,0
+/* 0x0008 20 ( 0 0) */ .word 1105199103,-4194304
+/* 0x0010 21 ( 0 0) */ .align 16
+/* 0x0010 27 ( 0 0) */ .global mul_add
+
+!
+! ENTRY mul_add
+!
+
+ .global mul_add
+ mul_add: /* frequency 1.0 confidence 0.0 */
+/* 0x0010 29 ( 0 1) */ sethi %hi(0x1800),%g1
+/* 0x0014 30 ( 0 1) */ sethi %hi(mask_cnst),%g2
+/* 0x0018 31 ( 1 2) */ xor %g1,-984,%g1
+/* 0x001c 32 ( 1 2) */ add %g2,%lo(mask_cnst),%g2
+/* 0x0020 33 ( 2 4) */ save %sp,%g1,%sp
+
+!
+! ENTRY .L900000154
+!
+
+ .L900000154: /* frequency 1.0 confidence 0.0 */
+/* 0x0024 35 ( 0 2) */ call (.+0x8) ! params = ! Result =
+/* 0x0028 ( 1 2) */ sethi %hi((_GLOBAL_OFFSET_TABLE_-(.L900000154-.))),%g5
+/* 0x002c 177 ( 2 3) */ sethi %hi(.L_const_seg_900000106),%g3
+/* 0x0030 178 ( 2 3) */ add %g5,%lo((_GLOBAL_OFFSET_TABLE_-(.L900000154-.))),%g5
+/* 0x0034 179 ( 3 4) */ or %g0,%i4,%o1
+/* 0x0038 180 ( 3 4) */ st %o1,[%fp+84]
+/* 0x003c 181 ( 3 4) */ add %g5,%o7,%o3
+/* 0x0040 182 ( 4 5) */ add %g3,%lo(.L_const_seg_900000106),%g3
+/* 0x0044 183 ( 4 6) */ ld [%o3+%g2],%g2
+/* 0x0048 184 ( 4 5) */ or %g0,%i3,%o2
+/* 0x004c 185 ( 5 6) */ sethi %hi(0x80000),%g4
+/* 0x0050 186 ( 5 7) */ ld [%o3+%g3],%o0
+/* 0x0054 187 ( 5 6) */ or %g0,%i2,%g5
+/* 0x0058 188 ( 6 7) */ or %g0,%o2,%o3
+/* 0x005c 189 ( 6 10) */ ldd [%g2],%f0
+/* 0x0060 190 ( 6 7) */ subcc %o1,%g4,%g0
+/* 0x0064 191 ( 6 7) */ bcc,pn %icc,.L77000048 ! tprob=0.50
+/* 0x0068 ( 7 8) */ subcc %o2,8,%g0
+/* 0x006c 193 ( 7 8) */ bne,pn %icc,.L77000037 ! tprob=0.50
+/* 0x0070 ( 8 12) */ ldd [%o0],%f8
+/* 0x0074 195 ( 9 13) */ ldd [%g5],%f4
+/* 0x0078 196 (10 14) */ ldd [%g5+8],%f6
+/* 0x007c 197 (11 15) */ ldd [%g5+16],%f10
+/* 0x0080 198 (11 14) */ fmovs %f8,%f12
+/* 0x0084 199 (12 16) */ fxnor %f0,%f4,%f4
+/* 0x0088 200 (12 14) */ ld [%fp+84],%f13
+/* 0x008c 201 (13 17) */ ldd [%o0+8],%f14
+/* 0x0090 202 (13 17) */ fxnor %f0,%f6,%f6
+/* 0x0094 203 (14 18) */ ldd [%g5+24],%f16
+/* 0x0098 204 (14 18) */ fxnor %f0,%f10,%f10
+/* 0x009c 208 (15 17) */ ld [%i1],%g2
+/* 0x00a0 209 (15 20) */ fsubd %f12,%f8,%f8
+/* 0x00a4 210 (16 21) */ fitod %f4,%f18
+/* 0x00a8 211 (16 18) */ ld [%i1+4],%g3
+/* 0x00ac 212 (17 22) */ fitod %f5,%f4
+/* 0x00b0 213 (17 19) */ ld [%i1+8],%g4
+/* 0x00b4 214 (18 23) */ fitod %f6,%f20
+/* 0x00b8 215 (18 20) */ ld [%i1+12],%g5
+/* 0x00bc 216 (19 21) */ ld [%i1+16],%o0
+/* 0x00c0 217 (19 24) */ fitod %f7,%f6
+/* 0x00c4 218 (20 22) */ ld [%i1+20],%o1
+/* 0x00c8 219 (20 24) */ fxnor %f0,%f16,%f16
+/* 0x00cc 220 (21 26) */ fsubd %f14,%f18,%f12
+/* 0x00d0 221 (21 23) */ ld [%i1+24],%o2
+/* 0x00d4 222 (22 27) */ fsubd %f14,%f4,%f4
+/* 0x00d8 223 (22 24) */ ld [%i1+28],%o3
+/* 0x00dc 224 (23 28) */ fitod %f10,%f18
+/* 0x00e0 225 (24 29) */ fsubd %f14,%f20,%f20
+/* 0x00e4 226 (25 30) */ fitod %f11,%f10
+/* 0x00e8 227 (26 31) */ fsubd %f14,%f6,%f6
+/* 0x00ec 228 (26 31) */ fmuld %f12,%f8,%f12
+/* 0x00f0 229 (27 32) */ fitod %f16,%f22
+/* 0x00f4 230 (27 32) */ fmuld %f4,%f8,%f4
+/* 0x00f8 231 (28 33) */ fsubd %f14,%f18,%f18
+/* 0x00fc 232 (29 34) */ fitod %f17,%f16
+/* 0x0100 233 (29 34) */ fmuld %f20,%f8,%f20
+/* 0x0104 234 (30 35) */ fsubd %f14,%f10,%f10
+/* 0x0108 235 (31 36) */ fdtox %f12,%f12
+/* 0x010c 236 (31 32) */ std %f12,[%sp+152]
+/* 0x0110 237 (31 36) */ fmuld %f6,%f8,%f6
+/* 0x0114 238 (32 37) */ fdtox %f4,%f4
+/* 0x0118 239 (32 33) */ std %f4,[%sp+144]
+/* 0x011c 240 (33 38) */ fsubd %f14,%f22,%f4
+/* 0x0120 241 (33 38) */ fmuld %f18,%f8,%f12
+/* 0x0124 242 (34 39) */ fdtox %f20,%f18
+/* 0x0128 243 (34 35) */ std %f18,[%sp+136]
+/* 0x012c 244 (35 37) */ ldx [%sp+152],%o4
+/* 0x0130 245 (35 40) */ fsubd %f14,%f16,%f14
+/* 0x0134 246 (35 40) */ fmuld %f10,%f8,%f10
+/* 0x0138 247 (36 41) */ fdtox %f6,%f6
+/* 0x013c 248 (36 37) */ std %f6,[%sp+128]
+/* 0x0140 249 (37 39) */ ldx [%sp+144],%o5
+/* 0x0144 250 (37 38) */ add %o4,%g2,%o4
+/* 0x0148 251 (38 39) */ st %o4,[%i0]
+/* 0x014c 252 (38 39) */ srax %o4,32,%g2
+/* 0x0150 253 (38 43) */ fdtox %f12,%f6
+/* 0x0154 254 (38 43) */ fmuld %f4,%f8,%f4
+/* 0x0158 255 (39 40) */ std %f6,[%sp+120]
+/* 0x015c 256 (39 40) */ add %o5,%g3,%g3
+/* 0x0160 257 (40 42) */ ldx [%sp+136],%o7
+/* 0x0164 258 (40 41) */ add %g3,%g2,%g2
+/* 0x0168 259 (40 45) */ fmuld %f14,%f8,%f6
+/* 0x016c 260 (40 45) */ fdtox %f10,%f8
+/* 0x0170 261 (41 42) */ std %f8,[%sp+112]
+/* 0x0174 262 (41 42) */ srax %g2,32,%o5
+/* 0x0178 263 (42 44) */ ldx [%sp+128],%g3
+/* 0x017c 264 (42 43) */ add %o7,%g4,%g4
+/* 0x0180 265 (43 44) */ st %g2,[%i0+4]
+/* 0x0184 266 (43 44) */ add %g4,%o5,%g4
+/* 0x0188 267 (43 48) */ fdtox %f4,%f4
+/* 0x018c 268 (44 46) */ ldx [%sp+120],%o5
+/* 0x0190 269 (44 45) */ add %g3,%g5,%g3
+/* 0x0194 270 (44 45) */ srax %g4,32,%g5
+/* 0x0198 271 (45 46) */ std %f4,[%sp+104]
+/* 0x019c 272 (45 46) */ add %g3,%g5,%g3
+/* 0x01a0 273 (45 50) */ fdtox %f6,%f4
+/* 0x01a4 274 (46 47) */ std %f4,[%sp+96]
+/* 0x01a8 275 (46 47) */ add %o5,%o0,%o0
+/* 0x01ac 276 (46 47) */ srax %g3,32,%o5
+/* 0x01b0 277 (47 49) */ ldx [%sp+112],%g5
+/* 0x01b4 278 (47 48) */ add %o0,%o5,%o0
+/* 0x01b8 279 (48 49) */ st %g4,[%i0+8]
+/* 0x01bc 280 (49 51) */ ldx [%sp+104],%o5
+/* 0x01c0 281 (49 50) */ add %g5,%o1,%o1
+/* 0x01c4 282 (49 50) */ srax %o0,32,%g5
+/* 0x01c8 283 (50 51) */ st %o0,[%i0+16]
+/* 0x01cc 284 (50 51) */ add %o1,%g5,%o1
+/* 0x01d0 285 (51 53) */ ldx [%sp+96],%g5
+/* 0x01d4 286 (51 52) */ add %o5,%o2,%o2
+/* 0x01d8 287 (51 52) */ srax %o1,32,%o5
+/* 0x01dc 288 (52 53) */ st %o1,[%i0+20]
+/* 0x01e0 289 (52 53) */ add %o2,%o5,%o2
+/* 0x01e4 290 (53 54) */ st %o2,[%i0+24]
+/* 0x01e8 291 (53 54) */ srax %o2,32,%g4
+/* 0x01ec 292 (53 54) */ add %g5,%o3,%g2
+/* 0x01f0 293 (54 55) */ st %g3,[%i0+12]
+/* 0x01f4 294 (54 55) */ add %g2,%g4,%g2
+/* 0x01f8 295 (55 56) */ st %g2,[%i0+28]
+/* 0x01fc 299 (55 56) */ srax %g2,32,%o7
+/* 0x0200 300 (56 57) */ or %g0,%o7,%i0
+/* 0x0204 (57 64) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0208 (59 61) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000037
+!
+
+ .L77000037: /* frequency 1.0 confidence 0.0 */
+/* 0x020c 307 ( 0 1) */ subcc %o2,16,%g0
+/* 0x0210 308 ( 0 1) */ bne,pn %icc,.L77000076 ! tprob=0.50
+/* 0x0214 ( 1 5) */ ldd [%o0],%f8
+/* 0x0218 310 ( 2 6) */ ldd [%g5],%f4
+/* 0x021c 311 ( 3 7) */ ldd [%g5+8],%f6
+/* 0x0220 317 ( 4 8) */ ldd [%o0+8],%f14
+/* 0x0224 318 ( 4 7) */ fmovs %f8,%f12
+/* 0x0228 319 ( 5 7) */ ld [%fp+84],%f13
+/* 0x022c 320 ( 5 9) */ fxnor %f0,%f4,%f4
+/* 0x0230 321 ( 6 10) */ ldd [%g5+16],%f10
+/* 0x0234 322 ( 6 10) */ fxnor %f0,%f6,%f6
+/* 0x0238 323 ( 7 11) */ ldd [%g5+24],%f16
+/* 0x023c 324 ( 8 12) */ ldd [%g5+32],%f20
+/* 0x0240 325 ( 8 13) */ fsubd %f12,%f8,%f8
+/* 0x0244 331 ( 9 11) */ ld [%i1+40],%o7
+/* 0x0248 332 ( 9 14) */ fitod %f4,%f18
+/* 0x024c 333 (10 14) */ ldd [%g5+40],%f22
+/* 0x0250 334 (10 15) */ fitod %f5,%f4
+/* 0x0254 335 (11 12) */ stx %o7,[%sp+96]
+/* 0x0258 336 (11 16) */ fitod %f6,%f24
+/* 0x025c 337 (12 14) */ ld [%i1+44],%o7
+/* 0x0260 338 (12 16) */ fxnor %f0,%f10,%f10
+/* 0x0264 339 (13 17) */ ldd [%g5+48],%f26
+/* 0x0268 340 (13 18) */ fitod %f7,%f6
+/* 0x026c 341 (14 15) */ stx %o7,[%sp+104]
+/* 0x0270 342 (14 19) */ fsubd %f14,%f18,%f18
+/* 0x0274 343 (15 17) */ ld [%i1+48],%o7
+/* 0x0278 344 (15 20) */ fsubd %f14,%f4,%f4
+/* 0x027c 345 (16 18) */ ld [%i1+36],%o5
+/* 0x0280 346 (16 21) */ fitod %f10,%f28
+/* 0x0284 347 (17 18) */ stx %o7,[%sp+112]
+/* 0x0288 348 (17 21) */ fxnor %f0,%f16,%f16
+/* 0x028c 349 (18 20) */ ld [%i1],%g2
+/* 0x0290 350 (18 23) */ fsubd %f14,%f24,%f24
+/* 0x0294 351 (19 20) */ stx %o5,[%sp+120]
+/* 0x0298 352 (19 24) */ fitod %f11,%f10
+/* 0x029c 353 (19 24) */ fmuld %f18,%f8,%f18
+/* 0x02a0 354 (20 22) */ ld [%i1+52],%o5
+/* 0x02a4 355 (20 25) */ fsubd %f14,%f6,%f6
+/* 0x02a8 356 (20 25) */ fmuld %f4,%f8,%f4
+/* 0x02ac 357 (21 26) */ fitod %f16,%f30
+/* 0x02b0 358 (22 26) */ fxnor %f0,%f20,%f20
+/* 0x02b4 359 (22 24) */ ld [%i1+4],%g3
+/* 0x02b8 360 (23 27) */ ldd [%g5+56],%f2
+/* 0x02bc 361 (23 28) */ fsubd %f14,%f28,%f28
+/* 0x02c0 362 (23 28) */ fmuld %f24,%f8,%f24
+/* 0x02c4 363 (24 25) */ stx %o5,[%sp+128]
+/* 0x02c8 364 (24 29) */ fdtox %f18,%f18
+/* 0x02cc 365 (25 26) */ std %f18,[%sp+272]
+/* 0x02d0 366 (25 30) */ fitod %f17,%f16
+/* 0x02d4 367 (25 30) */ fmuld %f6,%f8,%f6
+/* 0x02d8 368 (26 31) */ fsubd %f14,%f10,%f10
+/* 0x02dc 369 (27 32) */ fitod %f20,%f18
+/* 0x02e0 370 (28 33) */ fdtox %f4,%f4
+/* 0x02e4 371 (28 29) */ std %f4,[%sp+264]
+/* 0x02e8 372 (28 33) */ fmuld %f28,%f8,%f28
+/* 0x02ec 373 (29 31) */ ld [%i1+8],%g4
+/* 0x02f0 374 (29 34) */ fsubd %f14,%f30,%f4
+/* 0x02f4 375 (30 34) */ fxnor %f0,%f22,%f22
+/* 0x02f8 376 (30 32) */ ld [%i1+12],%g5
+/* 0x02fc 377 (31 33) */ ld [%i1+16],%o0
+/* 0x0300 378 (31 36) */ fitod %f21,%f20
+/* 0x0304 379 (31 36) */ fmuld %f10,%f8,%f10
+/* 0x0308 380 (32 34) */ ld [%i1+20],%o1
+/* 0x030c 381 (32 37) */ fdtox %f24,%f24
+/* 0x0310 382 (33 34) */ std %f24,[%sp+256]
+/* 0x0314 383 (33 38) */ fsubd %f14,%f16,%f16
+/* 0x0318 384 (34 36) */ ldx [%sp+272],%o7
+/* 0x031c 385 (34 39) */ fdtox %f6,%f6
+/* 0x0320 386 (34 39) */ fmuld %f4,%f8,%f4
+/* 0x0324 387 (35 36) */ std %f6,[%sp+248]
+/* 0x0328 388 (35 40) */ fitod %f22,%f24
+/* 0x032c 389 (36 38) */ ld [%i1+32],%o4
+/* 0x0330 390 (36 41) */ fsubd %f14,%f18,%f6
+/* 0x0334 391 (36 37) */ add %o7,%g2,%g2
+/* 0x0338 392 (37 39) */ ldx [%sp+264],%o7
+/* 0x033c 393 (37 41) */ fxnor %f0,%f26,%f26
+/* 0x0340 394 (37 38) */ srax %g2,32,%o5
+/* 0x0344 395 (38 39) */ st %g2,[%i0]
+/* 0x0348 396 (38 43) */ fitod %f23,%f18
+/* 0x034c 397 (38 43) */ fmuld %f16,%f8,%f16
+/* 0x0350 398 (39 41) */ ldx [%sp+248],%g2
+/* 0x0354 399 (39 44) */ fdtox %f28,%f22
+/* 0x0358 400 (39 40) */ add %o7,%g3,%g3
+/* 0x035c 401 (40 42) */ ldx [%sp+256],%o7
+/* 0x0360 402 (40 45) */ fsubd %f14,%f20,%f20
+/* 0x0364 403 (40 41) */ add %g3,%o5,%g3
+/* 0x0368 404 (41 42) */ std %f22,[%sp+240]
+/* 0x036c 405 (41 46) */ fitod %f26,%f22
+/* 0x0370 406 (41 42) */ srax %g3,32,%o5
+/* 0x0374 407 (41 42) */ add %g2,%g5,%g2
+/* 0x0378 408 (42 43) */ st %g3,[%i0+4]
+/* 0x037c 409 (42 47) */ fdtox %f10,%f10
+/* 0x0380 410 (42 43) */ add %o7,%g4,%g4
+/* 0x0384 411 (42 47) */ fmuld %f6,%f8,%f6
+/* 0x0388 412 (43 44) */ std %f10,[%sp+232]
+/* 0x038c 413 (43 47) */ fxnor %f0,%f2,%f12
+/* 0x0390 414 (43 44) */ add %g4,%o5,%g4
+/* 0x0394 415 (44 45) */ st %g4,[%i0+8]
+/* 0x0398 416 (44 45) */ srax %g4,32,%o5
+/* 0x039c 417 (44 49) */ fsubd %f14,%f24,%f10
+/* 0x03a0 418 (45 47) */ ldx [%sp+240],%o7
+/* 0x03a4 419 (45 50) */ fdtox %f4,%f4
+/* 0x03a8 420 (45 46) */ add %g2,%o5,%g2
+/* 0x03ac 421 (45 50) */ fmuld %f20,%f8,%f20
+/* 0x03b0 422 (46 47) */ std %f4,[%sp+224]
+/* 0x03b4 423 (46 47) */ srax %g2,32,%g5
+/* 0x03b8 424 (46 51) */ fsubd %f14,%f18,%f4
+/* 0x03bc 425 (47 48) */ st %g2,[%i0+12]
+/* 0x03c0 426 (47 52) */ fitod %f27,%f24
+/* 0x03c4 427 (47 48) */ add %o7,%o0,%g3
+/* 0x03c8 428 (48 50) */ ldx [%sp+232],%o5
+/* 0x03cc 429 (48 53) */ fdtox %f16,%f16
+/* 0x03d0 430 (48 49) */ add %g3,%g5,%g2
+/* 0x03d4 431 (49 50) */ std %f16,[%sp+216]
+/* 0x03d8 432 (49 50) */ srax %g2,32,%g4
+/* 0x03dc 433 (49 54) */ fitod %f12,%f18
+/* 0x03e0 434 (49 54) */ fmuld %f10,%f8,%f10
+/* 0x03e4 435 (50 51) */ st %g2,[%i0+16]
+/* 0x03e8 436 (50 55) */ fsubd %f14,%f22,%f16
+/* 0x03ec 437 (50 51) */ add %o5,%o1,%g2
+/* 0x03f0 438 (51 53) */ ld [%i1+24],%o2
+/* 0x03f4 439 (51 56) */ fitod %f13,%f12
+/* 0x03f8 440 (51 52) */ add %g2,%g4,%g2
+/* 0x03fc 441 (51 56) */ fmuld %f4,%f8,%f22
+/* 0x0400 442 (52 54) */ ldx [%sp+224],%g3
+/* 0x0404 443 (52 53) */ srax %g2,32,%g4
+/* 0x0408 444 (52 57) */ fdtox %f6,%f6
+/* 0x040c 445 (53 54) */ std %f6,[%sp+208]
+/* 0x0410 446 (53 58) */ fdtox %f20,%f6
+/* 0x0414 447 (54 55) */ stx %o4,[%sp+136]
+/* 0x0418 448 (54 59) */ fsubd %f14,%f24,%f4
+/* 0x041c 449 (55 56) */ std %f6,[%sp+200]
+/* 0x0420 450 (55 60) */ fsubd %f14,%f18,%f6
+/* 0x0424 451 (55 60) */ fmuld %f16,%f8,%f16
+/* 0x0428 452 (56 57) */ st %g2,[%i0+20]
+/* 0x042c 453 (56 57) */ add %g3,%o2,%g2
+/* 0x0430 454 (56 61) */ fdtox %f10,%f10
+/* 0x0434 455 (57 59) */ ld [%i1+28],%o3
+/* 0x0438 456 (57 58) */ add %g2,%g4,%g2
+/* 0x043c 457 (58 60) */ ldx [%sp+216],%g5
+/* 0x0440 458 (58 59) */ srax %g2,32,%g4
+/* 0x0444 459 (59 60) */ std %f10,[%sp+192]
+/* 0x0448 460 (59 64) */ fsubd %f14,%f12,%f10
+/* 0x044c 461 (59 64) */ fmuld %f4,%f8,%f4
+/* 0x0450 462 (60 61) */ st %g2,[%i0+24]
+/* 0x0454 463 (60 61) */ add %g5,%o3,%g2
+/* 0x0458 464 (60 65) */ fdtox %f22,%f12
+/* 0x045c 465 (60 65) */ fmuld %f6,%f8,%f6
+/* 0x0460 466 (61 63) */ ldx [%sp+136],%o0
+/* 0x0464 467 (61 62) */ add %g2,%g4,%g2
+/* 0x0468 468 (62 64) */ ldx [%sp+208],%g3
+/* 0x046c 469 (62 63) */ srax %g2,32,%g4
+/* 0x0470 470 (63 65) */ ldx [%sp+120],%o1
+/* 0x0474 471 (64 66) */ ldx [%sp+200],%g5
+/* 0x0478 472 (64 65) */ add %g3,%o0,%g3
+/* 0x047c 473 (64 69) */ fdtox %f4,%f4
+/* 0x0480 474 (64 69) */ fmuld %f10,%f8,%f8
+/* 0x0484 475 (65 66) */ std %f12,[%sp+184]
+/* 0x0488 476 (65 66) */ add %g3,%g4,%g3
+/* 0x048c 477 (65 70) */ fdtox %f16,%f12
+/* 0x0490 478 (66 67) */ std %f12,[%sp+176]
+/* 0x0494 479 (66 67) */ srax %g3,32,%o0
+/* 0x0498 480 (66 67) */ add %g5,%o1,%g5
+/* 0x049c 481 (67 69) */ ldx [%sp+192],%o2
+/* 0x04a0 482 (67 68) */ add %g5,%o0,%g5
+/* 0x04a4 483 (68 70) */ ldx [%sp+96],%g4
+/* 0x04a8 484 (68 69) */ srax %g5,32,%o1
+/* 0x04ac 485 (69 71) */ ld [%i1+56],%o4
+/* 0x04b0 486 (70 72) */ ldx [%sp+104],%o0
+/* 0x04b4 487 (70 71) */ add %o2,%g4,%g4
+/* 0x04b8 488 (71 72) */ std %f4,[%sp+168]
+/* 0x04bc 489 (71 72) */ add %g4,%o1,%g4
+/* 0x04c0 490 (71 76) */ fdtox %f6,%f4
+/* 0x04c4 491 (72 74) */ ldx [%sp+184],%o3
+/* 0x04c8 492 (72 73) */ srax %g4,32,%o2
+/* 0x04cc 493 (73 75) */ ldx [%sp+112],%o1
+/* 0x04d0 494 (74 75) */ std %f4,[%sp+160]
+/* 0x04d4 495 (74 75) */ add %o3,%o0,%o0
+/* 0x04d8 496 (74 79) */ fdtox %f8,%f4
+/* 0x04dc 497 (75 77) */ ldx [%sp+176],%o5
+/* 0x04e0 498 (75 76) */ add %o0,%o2,%o0
+/* 0x04e4 499 (76 77) */ stx %o4,[%sp+144]
+/* 0x04e8 500 (77 78) */ st %g2,[%i0+28]
+/* 0x04ec 501 (77 78) */ add %o5,%o1,%g2
+/* 0x04f0 502 (77 78) */ srax %o0,32,%o1
+/* 0x04f4 503 (78 79) */ std %f4,[%sp+152]
+/* 0x04f8 504 (78 79) */ add %g2,%o1,%o1
+/* 0x04fc 505 (79 81) */ ldx [%sp+168],%o7
+/* 0x0500 506 (79 80) */ srax %o1,32,%o3
+/* 0x0504 507 (80 82) */ ldx [%sp+128],%o2
+/* 0x0508 508 (81 83) */ ld [%i1+60],%o4
+/* 0x050c 509 (82 83) */ add %o7,%o2,%o2
+/* 0x0510 510 (83 84) */ add %o2,%o3,%o2
+/* 0x0514 511 (83 85) */ ldx [%sp+144],%o5
+/* 0x0518 512 (84 86) */ ldx [%sp+160],%g2
+/* 0x051c 513 (85 87) */ ldx [%sp+152],%o3
+/* 0x0520 514 (86 87) */ st %g3,[%i0+32]
+/* 0x0524 515 (86 87) */ add %g2,%o5,%g2
+/* 0x0528 516 (86 87) */ srax %o2,32,%o5
+/* 0x052c 517 (87 88) */ st %g5,[%i0+36]
+/* 0x0530 518 (87 88) */ add %g2,%o5,%g2
+/* 0x0534 519 (87 88) */ add %o3,%o4,%g3
+/* 0x0538 520 (88 89) */ st %o0,[%i0+44]
+/* 0x053c 521 (88 89) */ srax %g2,32,%g5
+/* 0x0540 522 (89 90) */ st %o1,[%i0+48]
+/* 0x0544 523 (89 90) */ add %g3,%g5,%g3
+/* 0x0548 524 (90 91) */ st %o2,[%i0+52]
+/* 0x054c 528 (90 91) */ srax %g3,32,%o7
+/* 0x0550 529 (91 92) */ st %g4,[%i0+40]
+/* 0x0554 530 (92 93) */ st %g2,[%i0+56]
+/* 0x0558 531 (93 94) */ st %g3,[%i0+60]
+/* 0x055c 532 (93 94) */ or %g0,%o7,%i0
+/* 0x0560 (94 101) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0564 (96 98) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000076
+!
+
+ .L77000076: /* frequency 1.0 confidence 0.0 */
+/* 0x0568 540 ( 0 4) */ ldd [%o0],%f6
+/* 0x056c 546 ( 0 1) */ add %o2,1,%g2
+/* 0x0570 547 ( 0 3) */ fmovd %f0,%f14
+/* 0x0574 548 ( 0 1) */ or %g0,0,%o7
+/* 0x0578 549 ( 1 3) */ ld [%fp+84],%f9
+/* 0x057c 550 ( 1 2) */ srl %g2,31,%g3
+/* 0x0580 551 ( 1 2) */ add %fp,-2264,%o5
+/* 0x0584 552 ( 2 3) */ add %g2,%g3,%g2
+/* 0x0588 553 ( 2 6) */ ldd [%o0+8],%f18
+/* 0x058c 554 ( 2 3) */ add %fp,-2256,%o4
+/* 0x0590 555 ( 3 6) */ fmovs %f6,%f8
+/* 0x0594 556 ( 3 4) */ sra %g2,1,%o1
+/* 0x0598 557 ( 3 4) */ or %g0,0,%g2
+/* 0x059c 558 ( 4 5) */ subcc %o1,0,%g0
+/* 0x05a0 559 ( 4 5) */ sub %o1,1,%o2
+/* 0x05a4 563 ( 5 6) */ add %g5,32,%o0
+/* 0x05a8 564 ( 6 11) */ fsubd %f8,%f6,%f16
+/* 0x05ac 565 ( 6 7) */ ble,pt %icc,.L900000161 ! tprob=0.50
+/* 0x05b0 ( 6 7) */ subcc %o3,0,%g0
+/* 0x05b4 567 ( 7 8) */ subcc %o1,7,%g0
+/* 0x05b8 568 ( 7 8) */ bl,pn %icc,.L77000077 ! tprob=0.50
+/* 0x05bc ( 7 8) */ sub %o1,2,%o1
+/* 0x05c0 570 ( 8 12) */ ldd [%g5],%f2
+/* 0x05c4 571 ( 9 13) */ ldd [%g5+8],%f4
+/* 0x05c8 572 ( 9 10) */ or %g0,5,%g2
+/* 0x05cc 573 (10 14) */ ldd [%g5+16],%f0
+/* 0x05d0 574 (11 15) */ fxnor %f14,%f2,%f2
+/* 0x05d4 575 (11 15) */ ldd [%g5+24],%f12
+/* 0x05d8 576 (12 16) */ fxnor %f14,%f4,%f6
+/* 0x05dc 577 (12 16) */ ldd [%g5+32],%f10
+/* 0x05e0 578 (13 17) */ fxnor %f14,%f0,%f8
+/* 0x05e4 579 (15 20) */ fitod %f3,%f0
+/* 0x05e8 580 (16 21) */ fitod %f2,%f4
+/* 0x05ec 581 (17 22) */ fitod %f7,%f2
+/* 0x05f0 582 (18 23) */ fitod %f6,%f6
+/* 0x05f4 583 (20 25) */ fsubd %f18,%f0,%f0
+/* 0x05f8 584 (21 26) */ fsubd %f18,%f4,%f4
+
+!
+! ENTRY .L900000149
+!
+
+ .L900000149: /* frequency 1.0 confidence 0.0 */
+/* 0x05fc 586 ( 0 4) */ fxnor %f14,%f12,%f22
+/* 0x0600 587 ( 0 5) */ fmuld %f4,%f16,%f4
+/* 0x0604 588 ( 0 1) */ add %g2,2,%g2
+/* 0x0608 589 ( 0 1) */ add %o4,32,%o4
+/* 0x060c 590 ( 1 6) */ fitod %f9,%f24
+/* 0x0610 591 ( 1 6) */ fmuld %f0,%f16,%f20
+/* 0x0614 592 ( 1 2) */ add %o0,8,%o0
+/* 0x0618 593 ( 1 2) */ subcc %g2,%o1,%g0
+/* 0x061c 594 ( 2 6) */ ldd [%o0],%f12
+/* 0x0620 595 ( 2 7) */ fsubd %f18,%f2,%f0
+/* 0x0624 596 ( 2 3) */ add %o5,32,%o5
+/* 0x0628 597 ( 3 8) */ fsubd %f18,%f6,%f2
+/* 0x062c 598 ( 5 10) */ fdtox %f4,%f4
+/* 0x0630 599 ( 6 11) */ fdtox %f20,%f6
+/* 0x0634 600 ( 6 7) */ std %f4,[%o5-32]
+/* 0x0638 601 ( 7 12) */ fitod %f8,%f4
+/* 0x063c 602 ( 7 8) */ std %f6,[%o4-32]
+/* 0x0640 603 ( 8 12) */ fxnor %f14,%f10,%f8
+/* 0x0644 604 ( 8 13) */ fmuld %f2,%f16,%f6
+/* 0x0648 605 ( 9 14) */ fitod %f23,%f2
+/* 0x064c 606 ( 9 14) */ fmuld %f0,%f16,%f20
+/* 0x0650 607 ( 9 10) */ add %o0,8,%o0
+/* 0x0654 608 (10 14) */ ldd [%o0],%f10
+/* 0x0658 609 (10 15) */ fsubd %f18,%f24,%f0
+/* 0x065c 610 (12 17) */ fsubd %f18,%f4,%f4
+/* 0x0660 611 (13 18) */ fdtox %f6,%f6
+/* 0x0664 612 (14 19) */ fdtox %f20,%f20
+/* 0x0668 613 (14 15) */ std %f6,[%o5-16]
+/* 0x066c 614 (15 20) */ fitod %f22,%f6
+/* 0x0670 615 (15 16) */ ble,pt %icc,.L900000149 ! tprob=0.50
+/* 0x0674 (15 16) */ std %f20,[%o4-16]
+
+!
+! ENTRY .L900000152
+!
+
+ .L900000152: /* frequency 1.0 confidence 0.0 */
+/* 0x0678 618 ( 0 4) */ fxnor %f14,%f12,%f12
+/* 0x067c 619 ( 0 5) */ fmuld %f0,%f16,%f22
+/* 0x0680 620 ( 0 1) */ add %o5,80,%o5
+/* 0x0684 621 ( 0 1) */ add %o4,80,%o4
+/* 0x0688 622 ( 1 5) */ fxnor %f14,%f10,%f0
+/* 0x068c 623 ( 1 6) */ fmuld %f4,%f16,%f24
+/* 0x0690 624 ( 1 2) */ subcc %g2,%o2,%g0
+/* 0x0694 625 ( 1 2) */ add %o0,8,%g5
+/* 0x0698 626 ( 2 7) */ fitod %f8,%f20
+/* 0x069c 627 ( 3 8) */ fitod %f9,%f8
+/* 0x06a0 628 ( 4 9) */ fsubd %f18,%f6,%f6
+/* 0x06a4 629 ( 5 10) */ fitod %f12,%f26
+/* 0x06a8 630 ( 6 11) */ fitod %f13,%f4
+/* 0x06ac 631 ( 7 12) */ fsubd %f18,%f2,%f12
+/* 0x06b0 632 ( 8 13) */ fitod %f0,%f2
+/* 0x06b4 633 ( 9 14) */ fitod %f1,%f0
+/* 0x06b8 634 (10 15) */ fsubd %f18,%f20,%f10
+/* 0x06bc 635 (10 15) */ fmuld %f6,%f16,%f20
+/* 0x06c0 636 (11 16) */ fsubd %f18,%f8,%f8
+/* 0x06c4 637 (12 17) */ fsubd %f18,%f26,%f6
+/* 0x06c8 638 (12 17) */ fmuld %f12,%f16,%f12
+/* 0x06cc 639 (13 18) */ fsubd %f18,%f4,%f4
+/* 0x06d0 640 (14 19) */ fsubd %f18,%f2,%f2
+/* 0x06d4 641 (15 20) */ fsubd %f18,%f0,%f0
+/* 0x06d8 642 (15 20) */ fmuld %f10,%f16,%f10
+/* 0x06dc 643 (16 21) */ fdtox %f24,%f24
+/* 0x06e0 644 (16 17) */ std %f24,[%o5-80]
+/* 0x06e4 645 (16 21) */ fmuld %f8,%f16,%f8
+/* 0x06e8 646 (17 22) */ fdtox %f22,%f22
+/* 0x06ec 647 (17 18) */ std %f22,[%o4-80]
+/* 0x06f0 648 (17 22) */ fmuld %f6,%f16,%f6
+/* 0x06f4 649 (18 23) */ fdtox %f20,%f20
+/* 0x06f8 650 (18 19) */ std %f20,[%o5-64]
+/* 0x06fc 651 (18 23) */ fmuld %f4,%f16,%f4
+/* 0x0700 652 (19 24) */ fdtox %f12,%f12
+/* 0x0704 653 (19 20) */ std %f12,[%o4-64]
+/* 0x0708 654 (19 24) */ fmuld %f2,%f16,%f2
+/* 0x070c 655 (20 25) */ fdtox %f10,%f10
+/* 0x0710 656 (20 21) */ std %f10,[%o5-48]
+/* 0x0714 657 (20 25) */ fmuld %f0,%f16,%f0
+/* 0x0718 658 (21 26) */ fdtox %f8,%f8
+/* 0x071c 659 (21 22) */ std %f8,[%o4-48]
+/* 0x0720 660 (22 27) */ fdtox %f6,%f6
+/* 0x0724 661 (22 23) */ std %f6,[%o5-32]
+/* 0x0728 662 (23 28) */ fdtox %f4,%f4
+/* 0x072c 663 (23 24) */ std %f4,[%o4-32]
+/* 0x0730 664 (24 29) */ fdtox %f2,%f2
+/* 0x0734 665 (24 25) */ std %f2,[%o5-16]
+/* 0x0738 666 (25 30) */ fdtox %f0,%f0
+/* 0x073c 667 (25 26) */ bg,pn %icc,.L77000043 ! tprob=0.50
+/* 0x0740 (25 26) */ std %f0,[%o4-16]
+
+!
+! ENTRY .L77000077
+!
+
+ .L77000077: /* frequency 1.0 confidence 0.0 */
+/* 0x0744 670 ( 0 4) */ ldd [%g5],%f0
+
+!
+! ENTRY .L900000160
+!
+
+ .L900000160: /* frequency 1.0 confidence 0.0 */
+/* 0x0748 672 ( 0 4) */ fxnor %f14,%f0,%f0
+/* 0x074c 673 ( 0 1) */ add %g2,1,%g2
+/* 0x0750 674 ( 0 1) */ add %g5,8,%g5
+/* 0x0754 675 ( 1 2) */ subcc %g2,%o2,%g0
+/* 0x0758 676 ( 4 9) */ fitod %f0,%f2
+/* 0x075c 677 ( 5 10) */ fitod %f1,%f0
+/* 0x0760 678 ( 9 14) */ fsubd %f18,%f2,%f2
+/* 0x0764 679 (10 15) */ fsubd %f18,%f0,%f0
+/* 0x0768 680 (14 19) */ fmuld %f2,%f16,%f2
+/* 0x076c 681 (15 20) */ fmuld %f0,%f16,%f0
+/* 0x0770 682 (19 24) */ fdtox %f2,%f2
+/* 0x0774 683 (19 20) */ std %f2,[%o5]
+/* 0x0778 684 (19 20) */ add %o5,16,%o5
+/* 0x077c 685 (20 25) */ fdtox %f0,%f0
+/* 0x0780 686 (20 21) */ std %f0,[%o4]
+/* 0x0784 687 (20 21) */ add %o4,16,%o4
+/* 0x0788 688 (20 21) */ ble,a,pt %icc,.L900000160 ! tprob=0.50
+/* 0x078c (23 27) */ ldd [%g5],%f0
+
+!
+! ENTRY .L77000043
+!
+
+ .L77000043: /* frequency 1.0 confidence 0.0 */
+/* 0x0790 696 ( 0 1) */ subcc %o3,0,%g0
+
+!
+! ENTRY .L900000161
+!
+
+ .L900000161: /* frequency 1.0 confidence 0.0 */
+/* 0x0794 698 ( 0 1) */ ble,a,pt %icc,.L900000159 ! tprob=0.50
+/* 0x0798 ( 0 1) */ or %g0,%o7,%i0
+/* 0x079c 703 ( 0 2) */ ldx [%fp-2256],%o2
+/* 0x07a0 704 ( 0 1) */ or %g0,%i1,%g3
+/* 0x07a4 705 ( 1 2) */ sub %o3,1,%o5
+/* 0x07a8 706 ( 1 2) */ or %g0,0,%g4
+/* 0x07ac 707 ( 2 3) */ add %fp,-2264,%g5
+/* 0x07b0 708 ( 2 3) */ or %g0,%i0,%g2
+/* 0x07b4 709 ( 3 4) */ subcc %o3,6,%g0
+/* 0x07b8 710 ( 3 4) */ sub %o5,2,%o4
+/* 0x07bc 711 ( 3 4) */ bl,pn %icc,.L77000078 ! tprob=0.50
+/* 0x07c0 ( 3 5) */ ldx [%fp-2264],%o0
+/* 0x07c4 713 ( 4 6) */ ld [%g3],%o1
+/* 0x07c8 714 ( 4 5) */ add %g2,4,%g2
+/* 0x07cc 715 ( 4 5) */ or %g0,3,%g4
+/* 0x07d0 716 ( 5 7) */ ld [%g3+4],%o3
+/* 0x07d4 717 ( 5 6) */ add %g3,8,%g3
+/* 0x07d8 718 ( 5 6) */ add %fp,-2240,%g5
+/* 0x07dc 719 ( 6 7) */ add %o0,%o1,%o0
+/* 0x07e0 720 ( 6 8) */ ldx [%fp-2248],%o1
+/* 0x07e4 721 ( 7 8) */ st %o0,[%g2-4]
+/* 0x07e8 722 ( 7 8) */ srax %o0,32,%o0
+
+!
+! ENTRY .L900000145
+!
+
+ .L900000145: /* frequency 1.0 confidence 0.0 */
+/* 0x07ec 724 ( 0 2) */ ld [%g3],%o7
+/* 0x07f0 725 ( 0 1) */ add %o2,%o3,%o2
+/* 0x07f4 726 ( 0 1) */ sra %o0,0,%o3
+/* 0x07f8 727 ( 1 3) */ ldx [%g5],%o0
+/* 0x07fc 728 ( 1 2) */ add %o2,%o3,%o2
+/* 0x0800 729 ( 1 2) */ add %g4,3,%g4
+/* 0x0804 730 ( 2 3) */ st %o2,[%g2]
+/* 0x0808 731 ( 2 3) */ srax %o2,32,%o3
+/* 0x080c 732 ( 2 3) */ subcc %g4,%o4,%g0
+/* 0x0810 733 ( 3 5) */ ld [%g3+4],%o2
+/* 0x0814 734 ( 4 5) */ stx %o2,[%sp+96]
+/* 0x0818 735 ( 4 5) */ add %o1,%o7,%o1
+/* 0x081c 736 ( 5 7) */ ldx [%g5+8],%o2
+/* 0x0820 737 ( 5 6) */ add %o1,%o3,%o1
+/* 0x0824 738 ( 5 6) */ add %g2,12,%g2
+/* 0x0828 739 ( 6 7) */ st %o1,[%g2-8]
+/* 0x082c 740 ( 6 7) */ srax %o1,32,%o7
+/* 0x0830 741 ( 6 7) */ add %g3,12,%g3
+/* 0x0834 742 ( 7 9) */ ld [%g3-4],%o3
+/* 0x0838 743 ( 8 10) */ ldx [%sp+96],%o1
+/* 0x083c 744 (10 11) */ add %o0,%o1,%o0
+/* 0x0840 745 (10 12) */ ldx [%g5+16],%o1
+/* 0x0844 746 (11 12) */ add %o0,%o7,%o0
+/* 0x0848 747 (11 12) */ add %g5,24,%g5
+/* 0x084c 748 (11 12) */ st %o0,[%g2-4]
+/* 0x0850 749 (11 12) */ ble,pt %icc,.L900000145 ! tprob=0.50
+/* 0x0854 (12 13) */ srax %o0,32,%o0
+
+!
+! ENTRY .L900000148
+!
+
+ .L900000148: /* frequency 1.0 confidence 0.0 */
+/* 0x0858 752 ( 0 1) */ add %o2,%o3,%o2
+/* 0x085c 753 ( 0 1) */ sra %o0,0,%o3
+/* 0x0860 754 ( 0 2) */ ld [%g3],%o0
+/* 0x0864 755 ( 1 2) */ add %o2,%o3,%o3
+/* 0x0868 756 ( 1 2) */ add %g2,8,%g2
+/* 0x086c 757 ( 2 3) */ srax %o3,32,%o2
+/* 0x0870 758 ( 2 3) */ st %o3,[%g2-8]
+/* 0x0874 759 ( 2 3) */ add %o1,%o0,%o0
+/* 0x0878 760 ( 3 4) */ add %o0,%o2,%o0
+/* 0x087c 761 ( 3 4) */ st %o0,[%g2-4]
+/* 0x0880 762 ( 3 4) */ subcc %g4,%o5,%g0
+/* 0x0884 763 ( 3 4) */ bg,pn %icc,.L77000061 ! tprob=0.50
+/* 0x0888 ( 4 5) */ srax %o0,32,%o7
+/* 0x088c 765 ( 4 5) */ add %g3,4,%g3
+
+!
+! ENTRY .L77000078
+!
+
+ .L77000078: /* frequency 1.0 confidence 0.0 */
+/* 0x0890 767 ( 0 2) */ ld [%g3],%o2
+
+!
+! ENTRY .L900000158
+!
+
+ .L900000158: /* frequency 1.0 confidence 0.0 */
+/* 0x0894 769 ( 0 2) */ ldx [%g5],%o0
+/* 0x0898 770 ( 0 1) */ sra %o7,0,%o1
+/* 0x089c 771 ( 0 1) */ add %g4,1,%g4
+/* 0x08a0 772 ( 1 2) */ add %g3,4,%g3
+/* 0x08a4 773 ( 1 2) */ add %g5,8,%g5
+/* 0x08a8 774 ( 2 3) */ add %o0,%o2,%o0
+/* 0x08ac 775 ( 2 3) */ subcc %g4,%o5,%g0
+/* 0x08b0 776 ( 3 4) */ add %o0,%o1,%o0
+/* 0x08b4 777 ( 3 4) */ st %o0,[%g2]
+/* 0x08b8 778 ( 3 4) */ add %g2,4,%g2
+/* 0x08bc 779 ( 4 5) */ srax %o0,32,%o7
+/* 0x08c0 780 ( 4 5) */ ble,a,pt %icc,.L900000158 ! tprob=0.50
+/* 0x08c4 ( 4 6) */ ld [%g3],%o2
+
+!
+! ENTRY .L77000047
+!
+
+ .L77000047: /* frequency 1.0 confidence 0.0 */
+/* 0x08c8 783 ( 0 1) */ or %g0,%o7,%i0
+/* 0x08cc ( 1 8) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x08d0 ( 3 5) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000048
+!
+
+ .L77000048: /* frequency 1.0 confidence 0.0 */
+/* 0x08d4 794 ( 0 1) */ bne,pn %icc,.L77000050 ! tprob=0.50
+/* 0x08d8 ( 0 1) */ sethi %hi(0xfff80000),%g2
+/* 0x08dc 796 ( 0 4) */ ldd [%g5],%f4
+/* 0x08e0 804 ( 0 1) */ srl %o1,19,%g3
+/* 0x08e4 805 ( 1 2) */ st %g3,[%sp+240]
+/* 0x08e8 806 ( 1 2) */ andn %o1,%g2,%g2
+/* 0x08ec 807 ( 2 6) */ ldd [%o0],%f8
+/* 0x08f0 808 ( 3 4) */ st %g2,[%sp+244]
+/* 0x08f4 809 ( 3 7) */ fxnor %f0,%f4,%f4
+/* 0x08f8 810 ( 4 8) */ ldd [%g5+8],%f6
+/* 0x08fc 814 ( 5 9) */ ldd [%o0+8],%f18
+/* 0x0900 815 ( 5 8) */ fmovs %f8,%f12
+/* 0x0904 816 ( 6 10) */ ldd [%g5+16],%f10
+/* 0x0908 817 ( 6 9) */ fmovs %f8,%f16
+/* 0x090c 818 ( 7 11) */ ldd [%g5+24],%f20
+/* 0x0910 819 ( 7 12) */ fitod %f4,%f14
+/* 0x0914 823 ( 8 10) */ ld [%i1],%g2
+/* 0x0918 824 ( 8 13) */ fitod %f5,%f4
+/* 0x091c 825 ( 9 11) */ ld [%sp+240],%f13
+/* 0x0920 826 ( 9 13) */ fxnor %f0,%f6,%f6
+/* 0x0924 827 (10 12) */ ld [%sp+244],%f17
+/* 0x0928 828 (10 14) */ fxnor %f0,%f10,%f10
+/* 0x092c 829 (11 13) */ ld [%i1+28],%o3
+/* 0x0930 830 (11 15) */ fxnor %f0,%f20,%f20
+/* 0x0934 831 (12 14) */ ld [%i1+4],%g3
+/* 0x0938 832 (12 17) */ fsubd %f12,%f8,%f12
+/* 0x093c 833 (13 14) */ stx %o3,[%sp+96]
+/* 0x0940 834 (13 18) */ fsubd %f18,%f14,%f14
+/* 0x0944 835 (14 16) */ ld [%i1+8],%g4
+/* 0x0948 836 (14 19) */ fsubd %f16,%f8,%f8
+/* 0x094c 837 (15 17) */ ld [%i1+12],%g5
+/* 0x0950 838 (15 20) */ fsubd %f18,%f4,%f4
+/* 0x0954 839 (16 18) */ ld [%i1+16],%o0
+/* 0x0958 840 (16 21) */ fitod %f6,%f22
+/* 0x095c 841 (17 19) */ ld [%i1+20],%o1
+/* 0x0960 842 (17 22) */ fitod %f7,%f6
+/* 0x0964 843 (18 20) */ ld [%i1+24],%o2
+/* 0x0968 844 (18 23) */ fitod %f10,%f16
+/* 0x096c 845 (18 23) */ fmuld %f14,%f12,%f24
+/* 0x0970 846 (19 24) */ fitod %f20,%f28
+/* 0x0974 847 (19 24) */ fmuld %f14,%f8,%f14
+/* 0x0978 848 (20 25) */ fitod %f11,%f10
+/* 0x097c 849 (20 25) */ fmuld %f4,%f12,%f26
+/* 0x0980 850 (21 26) */ fsubd %f18,%f22,%f22
+/* 0x0984 851 (21 26) */ fmuld %f4,%f8,%f4
+/* 0x0988 852 (22 27) */ fsubd %f18,%f6,%f6
+/* 0x098c 853 (23 28) */ fdtox %f24,%f24
+/* 0x0990 854 (23 24) */ std %f24,[%sp+224]
+/* 0x0994 855 (24 29) */ fdtox %f14,%f14
+/* 0x0998 856 (24 25) */ std %f14,[%sp+232]
+/* 0x099c 857 (25 30) */ fdtox %f26,%f14
+/* 0x09a0 858 (25 26) */ std %f14,[%sp+208]
+/* 0x09a4 859 (26 28) */ ldx [%sp+224],%o4
+/* 0x09a8 860 (26 31) */ fitod %f21,%f20
+/* 0x09ac 861 (26 31) */ fmuld %f22,%f12,%f30
+/* 0x09b0 862 (27 29) */ ldx [%sp+232],%o5
+/* 0x09b4 863 (27 32) */ fsubd %f18,%f16,%f16
+/* 0x09b8 864 (27 32) */ fmuld %f22,%f8,%f22
+/* 0x09bc 865 (28 29) */ sllx %o4,19,%o4
+/* 0x09c0 866 (28 33) */ fdtox %f4,%f4
+/* 0x09c4 867 (28 29) */ std %f4,[%sp+216]
+/* 0x09c8 868 (28 33) */ fmuld %f6,%f12,%f24
+/* 0x09cc 869 (29 34) */ fsubd %f18,%f28,%f26
+/* 0x09d0 870 (29 30) */ add %o5,%o4,%o4
+/* 0x09d4 871 (29 34) */ fmuld %f6,%f8,%f6
+/* 0x09d8 872 (30 35) */ fsubd %f18,%f10,%f10
+/* 0x09dc 873 (30 31) */ add %o4,%g2,%g2
+/* 0x09e0 874 (30 31) */ st %g2,[%i0]
+/* 0x09e4 875 (31 33) */ ldx [%sp+208],%o7
+/* 0x09e8 876 (31 32) */ srlx %g2,32,%o5
+/* 0x09ec 877 (31 36) */ fsubd %f18,%f20,%f18
+/* 0x09f0 878 (32 37) */ fdtox %f30,%f28
+/* 0x09f4 879 (32 33) */ std %f28,[%sp+192]
+/* 0x09f8 880 (32 37) */ fmuld %f16,%f12,%f14
+/* 0x09fc 881 (33 34) */ sllx %o7,19,%o4
+/* 0x0a00 882 (33 35) */ ldx [%sp+216],%o7
+/* 0x0a04 883 (33 38) */ fdtox %f22,%f20
+/* 0x0a08 884 (33 38) */ fmuld %f16,%f8,%f16
+/* 0x0a0c 885 (34 35) */ std %f20,[%sp+200]
+/* 0x0a10 886 (34 39) */ fdtox %f24,%f20
+/* 0x0a14 887 (34 39) */ fmuld %f26,%f12,%f22
+/* 0x0a18 888 (35 36) */ std %f20,[%sp+176]
+/* 0x0a1c 889 (35 36) */ add %o7,%o4,%o4
+/* 0x0a20 890 (35 40) */ fdtox %f6,%f6
+/* 0x0a24 891 (35 40) */ fmuld %f10,%f12,%f4
+/* 0x0a28 892 (36 38) */ ldx [%sp+192],%o3
+/* 0x0a2c 893 (36 37) */ add %o4,%g3,%g3
+/* 0x0a30 894 (36 41) */ fmuld %f10,%f8,%f10
+/* 0x0a34 895 (37 38) */ std %f6,[%sp+184]
+/* 0x0a38 896 (37 38) */ add %g3,%o5,%g3
+/* 0x0a3c 897 (37 42) */ fdtox %f14,%f6
+/* 0x0a40 898 (37 42) */ fmuld %f26,%f8,%f20
+/* 0x0a44 899 (38 40) */ ldx [%sp+200],%o4
+/* 0x0a48 900 (38 39) */ sllx %o3,19,%o3
+/* 0x0a4c 901 (38 39) */ srlx %g3,32,%o5
+/* 0x0a50 902 (38 43) */ fdtox %f16,%f14
+/* 0x0a54 903 (39 40) */ std %f6,[%sp+160]
+/* 0x0a58 904 (39 44) */ fmuld %f18,%f12,%f12
+/* 0x0a5c 905 (40 42) */ ldx [%sp+176],%o7
+/* 0x0a60 906 (40 41) */ add %o4,%o3,%o3
+/* 0x0a64 907 (40 45) */ fdtox %f4,%f16
+/* 0x0a68 908 (40 45) */ fmuld %f18,%f8,%f18
+/* 0x0a6c 909 (41 42) */ std %f14,[%sp+168]
+/* 0x0a70 910 (41 42) */ add %o3,%g4,%g4
+/* 0x0a74 911 (41 46) */ fdtox %f10,%f4
+/* 0x0a78 912 (42 44) */ ldx [%sp+184],%o3
+/* 0x0a7c 913 (42 43) */ sllx %o7,19,%o4
+/* 0x0a80 914 (42 43) */ add %g4,%o5,%g4
+/* 0x0a84 915 (42 47) */ fdtox %f22,%f14
+/* 0x0a88 916 (43 44) */ std %f16,[%sp+144]
+/* 0x0a8c 917 (43 44) */ srlx %g4,32,%o5
+/* 0x0a90 918 (43 48) */ fdtox %f20,%f6
+/* 0x0a94 919 (44 46) */ ldx [%sp+160],%o7
+/* 0x0a98 920 (44 45) */ add %o3,%o4,%o3
+/* 0x0a9c 921 (44 49) */ fdtox %f12,%f16
+/* 0x0aa0 922 (45 46) */ std %f4,[%sp+152]
+/* 0x0aa4 923 (45 46) */ add %o3,%g5,%g5
+/* 0x0aa8 924 (45 50) */ fdtox %f18,%f8
+/* 0x0aac 925 (46 48) */ ldx [%sp+168],%o3
+/* 0x0ab0 926 (46 47) */ sllx %o7,19,%o4
+/* 0x0ab4 927 (46 47) */ add %g5,%o5,%g5
+/* 0x0ab8 928 (47 48) */ std %f14,[%sp+128]
+/* 0x0abc 929 (47 48) */ srlx %g5,32,%o5
+/* 0x0ac0 930 (48 49) */ std %f6,[%sp+136]
+/* 0x0ac4 931 (48 49) */ add %o3,%o4,%o3
+/* 0x0ac8 932 (49 50) */ std %f16,[%sp+112]
+/* 0x0acc 933 (49 50) */ add %o3,%o0,%o0
+/* 0x0ad0 934 (50 52) */ ldx [%sp+144],%o7
+/* 0x0ad4 935 (50 51) */ add %o0,%o5,%o0
+/* 0x0ad8 936 (51 53) */ ldx [%sp+152],%o3
+/* 0x0adc 937 (52 53) */ std %f8,[%sp+120]
+/* 0x0ae0 938 (52 53) */ sllx %o7,19,%o4
+/* 0x0ae4 939 (52 53) */ srlx %o0,32,%o7
+/* 0x0ae8 940 (53 54) */ stx %o0,[%sp+104]
+/* 0x0aec 941 (53 54) */ add %o3,%o4,%o3
+/* 0x0af0 942 (54 56) */ ldx [%sp+128],%o5
+/* 0x0af4 943 (54 55) */ add %o3,%o1,%o1
+/* 0x0af8 944 (55 57) */ ldx [%sp+136],%o0
+/* 0x0afc 945 (55 56) */ add %o1,%o7,%o1
+/* 0x0b00 946 (56 57) */ st %g3,[%i0+4]
+/* 0x0b04 947 (56 57) */ sllx %o5,19,%o3
+/* 0x0b08 948 (57 59) */ ldx [%sp+112],%o4
+/* 0x0b0c 949 (57 58) */ add %o0,%o3,%o3
+/* 0x0b10 950 (58 60) */ ldx [%sp+120],%o0
+/* 0x0b14 951 (58 59) */ add %o3,%o2,%o2
+/* 0x0b18 952 (58 59) */ srlx %o1,32,%o3
+/* 0x0b1c 953 (59 60) */ st %o1,[%i0+20]
+/* 0x0b20 954 (59 60) */ sllx %o4,19,%g2
+/* 0x0b24 955 (59 60) */ add %o2,%o3,%o2
+/* 0x0b28 956 (60 62) */ ldx [%sp+96],%o4
+/* 0x0b2c 957 (60 61) */ srlx %o2,32,%g3
+/* 0x0b30 958 (60 61) */ add %o0,%g2,%g2
+/* 0x0b34 959 (61 63) */ ldx [%sp+104],%o0
+/* 0x0b38 960 (62 63) */ st %o2,[%i0+24]
+/* 0x0b3c 961 (62 63) */ add %g2,%o4,%g2
+/* 0x0b40 962 (63 64) */ st %o0,[%i0+16]
+/* 0x0b44 963 (63 64) */ add %g2,%g3,%g2
+/* 0x0b48 964 (64 65) */ st %g4,[%i0+8]
+/* 0x0b4c 968 (64 65) */ srlx %g2,32,%o7
+/* 0x0b50 969 (65 66) */ st %g5,[%i0+12]
+/* 0x0b54 970 (66 67) */ st %g2,[%i0+28]
+/* 0x0b58 971 (66 67) */ or %g0,%o7,%i0
+/* 0x0b5c (67 74) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0b60 (69 71) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000050
+!
+
+ .L77000050: /* frequency 1.0 confidence 0.0 */
+/* 0x0b64 978 ( 0 1) */ subcc %o2,16,%g0
+/* 0x0b68 979 ( 0 1) */ bne,pn %icc,.L77000073 ! tprob=0.50
+/* 0x0b6c ( 0 1) */ sethi %hi(0xfff80000),%g2
+/* 0x0b70 981 ( 1 5) */ ldd [%g5],%f4
+/* 0x0b74 982 ( 2 6) */ ldd [%g5+8],%f6
+/* 0x0b78 989 ( 2 3) */ andn %o1,%g2,%g2
+/* 0x0b7c 993 ( 2 3) */ srl %o1,19,%g3
+/* 0x0b80 994 ( 3 7) */ ldd [%g5+16],%f8
+/* 0x0b84 995 ( 4 8) */ fxnor %f0,%f4,%f4
+/* 0x0b88 996 ( 4 5) */ st %g2,[%sp+356]
+/* 0x0b8c 997 ( 5 9) */ ldd [%o0],%f20
+/* 0x0b90 998 ( 5 9) */ fxnor %f0,%f6,%f6
+/* 0x0b94 999 ( 6 7) */ st %g3,[%sp+352]
+/* 0x0b98 1000 ( 6 10) */ fxnor %f0,%f8,%f8
+/* 0x0b9c 1005 ( 7 11) */ ldd [%o0+8],%f30
+/* 0x0ba0 1006 ( 8 13) */ fitod %f4,%f22
+/* 0x0ba4 1007 ( 8 12) */ ldd [%g5+24],%f10
+/* 0x0ba8 1008 ( 9 12) */ fmovs %f20,%f24
+/* 0x0bac 1009 ( 9 13) */ ldd [%g5+32],%f12
+/* 0x0bb0 1010 (10 15) */ fitod %f5,%f4
+/* 0x0bb4 1011 (10 14) */ ldd [%g5+40],%f14
+/* 0x0bb8 1012 (11 14) */ fmovs %f20,%f26
+/* 0x0bbc 1013 (11 15) */ ldd [%g5+48],%f16
+/* 0x0bc0 1014 (12 14) */ ld [%sp+356],%f25
+/* 0x0bc4 1015 (12 17) */ fitod %f6,%f28
+/* 0x0bc8 1016 (13 15) */ ld [%sp+352],%f27
+/* 0x0bcc 1017 (13 18) */ fitod %f8,%f32
+/* 0x0bd0 1018 (14 19) */ fsubd %f30,%f22,%f22
+/* 0x0bd4 1019 (14 18) */ ldd [%g5+56],%f18
+/* 0x0bd8 1020 (15 20) */ fsubd %f24,%f20,%f24
+/* 0x0bdc 1021 (16 21) */ fsubd %f26,%f20,%f20
+/* 0x0be0 1022 (17 22) */ fsubd %f30,%f4,%f4
+/* 0x0be4 1023 (18 23) */ fsubd %f30,%f28,%f26
+/* 0x0be8 1024 (19 24) */ fitod %f7,%f6
+/* 0x0bec 1025 (20 25) */ fsubd %f30,%f32,%f28
+/* 0x0bf0 1026 (20 25) */ fmuld %f22,%f24,%f32
+/* 0x0bf4 1027 (21 26) */ fmuld %f22,%f20,%f22
+/* 0x0bf8 1028 (21 25) */ fxnor %f0,%f10,%f10
+/* 0x0bfc 1029 (22 27) */ fmuld %f4,%f24,%f44
+/* 0x0c00 1030 (22 27) */ fitod %f9,%f8
+/* 0x0c04 1031 (23 28) */ fmuld %f4,%f20,%f4
+/* 0x0c08 1032 (23 27) */ fxnor %f0,%f12,%f12
+/* 0x0c0c 1033 (24 29) */ fsubd %f30,%f6,%f6
+/* 0x0c10 1034 (24 29) */ fmuld %f26,%f24,%f46
+/* 0x0c14 1035 (25 30) */ fitod %f10,%f34
+/* 0x0c18 1036 (26 31) */ fdtox %f22,%f22
+/* 0x0c1c 1037 (26 27) */ std %f22,[%sp+336]
+/* 0x0c20 1038 (27 32) */ fmuld %f26,%f20,%f22
+/* 0x0c24 1039 (27 32) */ fdtox %f44,%f26
+/* 0x0c28 1040 (27 28) */ std %f26,[%sp+328]
+/* 0x0c2c 1041 (28 33) */ fdtox %f4,%f4
+/* 0x0c30 1042 (28 29) */ std %f4,[%sp+320]
+/* 0x0c34 1043 (29 34) */ fmuld %f6,%f24,%f26
+/* 0x0c38 1044 (29 34) */ fsubd %f30,%f8,%f8
+/* 0x0c3c 1045 (30 35) */ fdtox %f46,%f4
+/* 0x0c40 1046 (30 31) */ std %f4,[%sp+312]
+/* 0x0c44 1047 (31 36) */ fmuld %f28,%f24,%f4
+/* 0x0c48 1048 (31 36) */ fdtox %f32,%f32
+/* 0x0c4c 1049 (31 32) */ std %f32,[%sp+344]
+/* 0x0c50 1050 (32 37) */ fitod %f11,%f10
+/* 0x0c54 1051 (32 37) */ fmuld %f6,%f20,%f32
+/* 0x0c58 1052 (33 38) */ fsubd %f30,%f34,%f34
+/* 0x0c5c 1053 (34 39) */ fdtox %f22,%f6
+/* 0x0c60 1054 (34 35) */ std %f6,[%sp+304]
+/* 0x0c64 1058 (35 40) */ fitod %f12,%f36
+/* 0x0c68 1059 (35 40) */ fmuld %f28,%f20,%f6
+/* 0x0c6c 1060 (36 41) */ fdtox %f26,%f22
+/* 0x0c70 1061 (36 37) */ std %f22,[%sp+296]
+/* 0x0c74 1062 (37 42) */ fmuld %f8,%f24,%f22
+/* 0x0c78 1063 (37 42) */ fdtox %f4,%f4
+/* 0x0c7c 1064 (37 38) */ std %f4,[%sp+280]
+/* 0x0c80 1065 (38 43) */ fmuld %f8,%f20,%f8
+/* 0x0c84 1066 (38 43) */ fsubd %f30,%f10,%f10
+/* 0x0c88 1067 (39 44) */ fmuld %f34,%f24,%f4
+/* 0x0c8c 1068 (39 44) */ fitod %f13,%f12
+/* 0x0c90 1069 (40 45) */ fsubd %f30,%f36,%f36
+/* 0x0c94 1070 (41 46) */ fdtox %f6,%f6
+/* 0x0c98 1071 (41 42) */ std %f6,[%sp+272]
+/* 0x0c9c 1072 (42 46) */ fxnor %f0,%f14,%f14
+/* 0x0ca0 1073 (42 47) */ fmuld %f34,%f20,%f6
+/* 0x0ca4 1074 (43 48) */ fdtox %f22,%f22
+/* 0x0ca8 1075 (43 44) */ std %f22,[%sp+264]
+/* 0x0cac 1076 (44 49) */ fdtox %f8,%f8
+/* 0x0cb0 1077 (44 45) */ std %f8,[%sp+256]
+/* 0x0cb4 1078 (44 49) */ fmuld %f10,%f24,%f22
+/* 0x0cb8 1079 (45 50) */ fdtox %f4,%f4
+/* 0x0cbc 1080 (45 46) */ std %f4,[%sp+248]
+/* 0x0cc0 1081 (45 50) */ fmuld %f10,%f20,%f8
+/* 0x0cc4 1082 (46 51) */ fsubd %f30,%f12,%f4
+/* 0x0cc8 1083 (46 51) */ fmuld %f36,%f24,%f10
+/* 0x0ccc 1084 (47 52) */ fitod %f14,%f38
+/* 0x0cd0 1085 (48 53) */ fdtox %f6,%f6
+/* 0x0cd4 1086 (48 49) */ std %f6,[%sp+240]
+/* 0x0cd8 1087 (49 54) */ fdtox %f22,%f12
+/* 0x0cdc 1088 (49 50) */ std %f12,[%sp+232]
+/* 0x0ce0 1089 (49 54) */ fmuld %f36,%f20,%f6
+/* 0x0ce4 1090 (50 55) */ fdtox %f8,%f8
+/* 0x0ce8 1091 (50 51) */ std %f8,[%sp+224]
+/* 0x0cec 1092 (51 56) */ fdtox %f10,%f22
+/* 0x0cf0 1093 (51 52) */ std %f22,[%sp+216]
+/* 0x0cf4 1094 (51 56) */ fmuld %f4,%f24,%f8
+/* 0x0cf8 1095 (52 57) */ fitod %f15,%f14
+/* 0x0cfc 1096 (52 57) */ fmuld %f4,%f20,%f4
+/* 0x0d00 1097 (53 58) */ fsubd %f30,%f38,%f22
+/* 0x0d04 1098 (54 58) */ fxnor %f0,%f16,%f16
+/* 0x0d08 1099 (55 60) */ fdtox %f6,%f6
+/* 0x0d0c 1100 (55 56) */ std %f6,[%sp+208]
+/* 0x0d10 1101 (56 61) */ fdtox %f8,%f6
+/* 0x0d14 1102 (56 57) */ std %f6,[%sp+200]
+/* 0x0d18 1103 (57 62) */ fsubd %f30,%f14,%f10
+/* 0x0d1c 1104 (58 63) */ fitod %f16,%f40
+/* 0x0d20 1105 (58 63) */ fmuld %f22,%f24,%f6
+/* 0x0d24 1106 (59 64) */ fdtox %f4,%f4
+/* 0x0d28 1107 (59 60) */ std %f4,[%sp+192]
+/* 0x0d2c 1108 (60 65) */ fitod %f17,%f16
+/* 0x0d30 1109 (60 65) */ fmuld %f22,%f20,%f4
+/* 0x0d34 1110 (61 65) */ fxnor %f0,%f18,%f18
+/* 0x0d38 1111 (62 67) */ fdtox %f32,%f32
+/* 0x0d3c 1112 (62 63) */ std %f32,[%sp+288]
+/* 0x0d40 1113 (62 67) */ fmuld %f10,%f24,%f8
+/* 0x0d44 1114 (63 68) */ fdtox %f6,%f6
+/* 0x0d48 1115 (63 64) */ std %f6,[%sp+184]
+/* 0x0d4c 1116 (63 68) */ fmuld %f10,%f20,%f22
+/* 0x0d50 1117 (64 69) */ fsubd %f30,%f40,%f6
+/* 0x0d54 1118 (65 70) */ fdtox %f4,%f4
+/* 0x0d58 1119 (65 66) */ std %f4,[%sp+176]
+/* 0x0d5c 1120 (66 71) */ fsubd %f30,%f16,%f10
+/* 0x0d60 1121 (67 72) */ fdtox %f8,%f4
+/* 0x0d64 1122 (67 68) */ std %f4,[%sp+168]
+/* 0x0d68 1123 (68 73) */ fdtox %f22,%f4
+/* 0x0d6c 1124 (68 69) */ std %f4,[%sp+160]
+/* 0x0d70 1125 (69 74) */ fitod %f18,%f42
+/* 0x0d74 1126 (69 74) */ fmuld %f6,%f24,%f4
+/* 0x0d78 1127 (70 75) */ fmuld %f6,%f20,%f22
+/* 0x0d7c 1128 (71 76) */ fmuld %f10,%f24,%f6
+/* 0x0d80 1129 (72 77) */ fmuld %f10,%f20,%f8
+/* 0x0d84 1130 (74 79) */ fdtox %f4,%f4
+/* 0x0d88 1131 (74 75) */ std %f4,[%sp+152]
+/* 0x0d8c 1132 (75 80) */ fsubd %f30,%f42,%f4
+/* 0x0d90 1133 (76 81) */ fdtox %f6,%f6
+/* 0x0d94 1134 (76 77) */ std %f6,[%sp+136]
+/* 0x0d98 1135 (77 82) */ fdtox %f22,%f22
+/* 0x0d9c 1136 (77 78) */ std %f22,[%sp+144]
+/* 0x0da0 1137 (78 83) */ fdtox %f8,%f22
+/* 0x0da4 1138 (78 79) */ std %f22,[%sp+128]
+/* 0x0da8 1139 (79 84) */ fitod %f19,%f22
+/* 0x0dac 1140 (80 85) */ fmuld %f4,%f24,%f6
+/* 0x0db0 1141 (81 86) */ fmuld %f4,%f20,%f4
+/* 0x0db4 1142 (84 89) */ fsubd %f30,%f22,%f22
+/* 0x0db8 1143 (85 90) */ fdtox %f6,%f6
+/* 0x0dbc 1144 (85 86) */ std %f6,[%sp+120]
+/* 0x0dc0 1145 (86 91) */ fdtox %f4,%f4
+/* 0x0dc4 1146 (86 87) */ std %f4,[%sp+112]
+/* 0x0dc8 1150 (87 89) */ ldx [%sp+336],%g2
+/* 0x0dcc 1151 (88 90) */ ldx [%sp+344],%g3
+/* 0x0dd0 1152 (89 91) */ ld [%i1],%g4
+/* 0x0dd4 1153 (89 90) */ sllx %g2,19,%g2
+/* 0x0dd8 1154 (89 94) */ fmuld %f22,%f20,%f4
+/* 0x0ddc 1155 (90 92) */ ldx [%sp+328],%g5
+/* 0x0de0 1156 (90 91) */ add %g3,%g2,%g2
+/* 0x0de4 1157 (90 95) */ fmuld %f22,%f24,%f6
+/* 0x0de8 1158 (91 93) */ ldx [%sp+320],%g3
+/* 0x0dec 1159 (91 92) */ add %g2,%g4,%g4
+/* 0x0df0 1160 (92 94) */ ldx [%sp+304],%o0
+/* 0x0df4 1161 (93 94) */ st %g4,[%i0]
+/* 0x0df8 1162 (93 94) */ sllx %g3,19,%g2
+/* 0x0dfc 1163 (93 94) */ srlx %g4,32,%g4
+/* 0x0e00 1164 (94 96) */ ld [%i1+4],%g3
+/* 0x0e04 1165 (94 95) */ add %g5,%g2,%g2
+/* 0x0e08 1166 (94 99) */ fdtox %f4,%f4
+/* 0x0e0c 1167 (95 97) */ ldx [%sp+312],%g5
+/* 0x0e10 1168 (95 100) */ fdtox %f6,%f6
+/* 0x0e14 1169 (96 98) */ ldx [%sp+288],%o1
+/* 0x0e18 1170 (96 97) */ add %g2,%g3,%g2
+/* 0x0e1c 1171 (96 97) */ sllx %o0,19,%g3
+/* 0x0e20 1172 (97 99) */ ldx [%sp+272],%o2
+/* 0x0e24 1173 (97 98) */ add %g2,%g4,%g2
+/* 0x0e28 1174 (97 98) */ add %g5,%g3,%g3
+/* 0x0e2c 1175 (98 100) */ ld [%i1+8],%g4
+/* 0x0e30 1176 (98 99) */ srlx %g2,32,%o0
+/* 0x0e34 1177 (99 101) */ ldx [%sp+296],%g5
+/* 0x0e38 1178 (100 101) */ st %g2,[%i0+4]
+/* 0x0e3c 1179 (100 101) */ sllx %o2,19,%g2
+/* 0x0e40 1180 (100 101) */ add %g3,%g4,%g3
+/* 0x0e44 1181 (101 103) */ ldx [%sp+256],%o2
+/* 0x0e48 1182 (101 102) */ sllx %o1,19,%g4
+/* 0x0e4c 1183 (101 102) */ add %g3,%o0,%g3
+/* 0x0e50 1184 (102 104) */ ld [%i1+12],%o0
+/* 0x0e54 1185 (102 103) */ srlx %g3,32,%o1
+/* 0x0e58 1186 (102 103) */ add %g5,%g4,%g4
+/* 0x0e5c 1187 (103 105) */ ldx [%sp+280],%g5
+/* 0x0e60 1188 (104 105) */ st %g3,[%i0+8]
+/* 0x0e64 1189 (104 105) */ sllx %o2,19,%g3
+/* 0x0e68 1190 (104 105) */ add %g4,%o0,%g4
+/* 0x0e6c 1191 (105 107) */ ld [%i1+16],%o0
+/* 0x0e70 1192 (105 106) */ add %g5,%g2,%g2
+/* 0x0e74 1193 (105 106) */ add %g4,%o1,%g4
+/* 0x0e78 1194 (106 108) */ ldx [%sp+264],%g5
+/* 0x0e7c 1195 (106 107) */ srlx %g4,32,%o1
+/* 0x0e80 1196 (107 109) */ ldx [%sp+240],%o2
+/* 0x0e84 1197 (107 108) */ add %g2,%o0,%g2
+/* 0x0e88 1198 (108 110) */ ld [%i1+20],%o0
+/* 0x0e8c 1199 (108 109) */ add %g5,%g3,%g3
+/* 0x0e90 1200 (108 109) */ add %g2,%o1,%g2
+/* 0x0e94 1201 (109 111) */ ldx [%sp+248],%g5
+/* 0x0e98 1202 (109 110) */ srlx %g2,32,%o1
+/* 0x0e9c 1203 (110 111) */ st %g4,[%i0+12]
+/* 0x0ea0 1204 (110 111) */ sllx %o2,19,%g4
+/* 0x0ea4 1205 (110 111) */ add %g3,%o0,%g3
+/* 0x0ea8 1206 (111 113) */ ld [%i1+24],%o0
+/* 0x0eac 1207 (111 112) */ add %g5,%g4,%g4
+/* 0x0eb0 1208 (111 112) */ add %g3,%o1,%g3
+/* 0x0eb4 1209 (112 114) */ ldx [%sp+224],%o2
+/* 0x0eb8 1210 (112 113) */ srlx %g3,32,%o1
+/* 0x0ebc 1211 (113 115) */ ldx [%sp+232],%g5
+/* 0x0ec0 1212 (113 114) */ add %g4,%o0,%g4
+/* 0x0ec4 1213 (114 115) */ st %g2,[%i0+16]
+/* 0x0ec8 1214 (114 115) */ sllx %o2,19,%g2
+/* 0x0ecc 1215 (114 115) */ add %g4,%o1,%g4
+/* 0x0ed0 1216 (115 117) */ ld [%i1+28],%o0
+/* 0x0ed4 1217 (115 116) */ srlx %g4,32,%o1
+/* 0x0ed8 1218 (115 116) */ add %g5,%g2,%g2
+/* 0x0edc 1222 (116 118) */ ldx [%sp+208],%o2
+/* 0x0ee0 1223 (117 119) */ ldx [%sp+216],%g5
+/* 0x0ee4 1224 (117 118) */ add %g2,%o0,%g2
+/* 0x0ee8 1225 (118 119) */ st %g3,[%i0+20]
+/* 0x0eec 1226 (118 119) */ sllx %o2,19,%g3
+/* 0x0ef0 1227 (118 119) */ add %g2,%o1,%g2
+/* 0x0ef4 1228 (119 121) */ ld [%i1+32],%o0
+/* 0x0ef8 1229 (119 120) */ srlx %g2,32,%o1
+/* 0x0efc 1230 (119 120) */ add %g5,%g3,%g3
+/* 0x0f00 1231 (120 122) */ ldx [%sp+192],%o2
+/* 0x0f04 1232 (121 123) */ ldx [%sp+200],%g5
+/* 0x0f08 1233 (121 122) */ add %g3,%o0,%g3
+/* 0x0f0c 1234 (122 123) */ st %g4,[%i0+24]
+/* 0x0f10 1235 (122 123) */ sllx %o2,19,%g4
+/* 0x0f14 1236 (122 123) */ add %g3,%o1,%g3
+/* 0x0f18 1237 (123 125) */ ld [%i1+36],%o0
+/* 0x0f1c 1238 (123 124) */ srlx %g3,32,%o1
+/* 0x0f20 1239 (123 124) */ add %g5,%g4,%g4
+/* 0x0f24 1240 (124 126) */ ldx [%sp+176],%o2
+/* 0x0f28 1241 (125 127) */ ldx [%sp+184],%g5
+/* 0x0f2c 1242 (125 126) */ add %g4,%o0,%g4
+/* 0x0f30 1243 (126 127) */ st %g2,[%i0+28]
+/* 0x0f34 1244 (126 127) */ sllx %o2,19,%g2
+/* 0x0f38 1245 (126 127) */ add %g4,%o1,%g4
+/* 0x0f3c 1246 (127 129) */ ld [%i1+40],%o0
+/* 0x0f40 1247 (127 128) */ srlx %g4,32,%o1
+/* 0x0f44 1248 (127 128) */ add %g5,%g2,%g2
+/* 0x0f48 1249 (128 130) */ ldx [%sp+160],%o2
+/* 0x0f4c 1250 (129 131) */ ldx [%sp+168],%g5
+/* 0x0f50 1251 (129 130) */ add %g2,%o0,%g2
+/* 0x0f54 1252 (130 131) */ st %g3,[%i0+32]
+/* 0x0f58 1253 (130 131) */ sllx %o2,19,%g3
+/* 0x0f5c 1254 (130 131) */ add %g2,%o1,%g2
+/* 0x0f60 1255 (131 133) */ ld [%i1+44],%o0
+/* 0x0f64 1256 (131 132) */ srlx %g2,32,%o1
+/* 0x0f68 1257 (131 132) */ add %g5,%g3,%g3
+/* 0x0f6c 1258 (132 134) */ ldx [%sp+144],%o2
+/* 0x0f70 1259 (133 135) */ ldx [%sp+152],%g5
+/* 0x0f74 1260 (133 134) */ add %g3,%o0,%g3
+/* 0x0f78 1261 (134 135) */ st %g4,[%i0+36]
+/* 0x0f7c 1262 (134 135) */ sllx %o2,19,%g4
+/* 0x0f80 1263 (134 135) */ add %g3,%o1,%g3
+/* 0x0f84 1264 (135 137) */ ld [%i1+48],%o0
+/* 0x0f88 1265 (135 136) */ srlx %g3,32,%o1
+/* 0x0f8c 1266 (135 136) */ add %g5,%g4,%g4
+/* 0x0f90 1267 (136 138) */ ldx [%sp+128],%o2
+/* 0x0f94 1268 (137 139) */ ldx [%sp+136],%g5
+/* 0x0f98 1269 (137 138) */ add %g4,%o0,%g4
+/* 0x0f9c 1270 (138 139) */ std %f4,[%sp+96]
+/* 0x0fa0 1271 (138 139) */ add %g4,%o1,%g4
+/* 0x0fa4 1272 (139 140) */ st %g2,[%i0+40]
+/* 0x0fa8 1273 (139 140) */ sllx %o2,19,%g2
+/* 0x0fac 1274 (139 140) */ srlx %g4,32,%o1
+/* 0x0fb0 1275 (140 142) */ ld [%i1+52],%o0
+/* 0x0fb4 1276 (140 141) */ add %g5,%g2,%g2
+/* 0x0fb8 1277 (141 142) */ std %f6,[%sp+104]
+/* 0x0fbc 1278 (142 144) */ ldx [%sp+120],%g5
+/* 0x0fc0 1279 (142 143) */ add %g2,%o0,%g2
+/* 0x0fc4 1280 (143 144) */ st %g3,[%i0+44]
+/* 0x0fc8 1281 (143 144) */ add %g2,%o1,%g2
+/* 0x0fcc 1282 (144 146) */ ldx [%sp+112],%o2
+/* 0x0fd0 1283 (144 145) */ srlx %g2,32,%o1
+/* 0x0fd4 1284 (145 147) */ ld [%i1+56],%o0
+/* 0x0fd8 1285 (146 147) */ st %g4,[%i0+48]
+/* 0x0fdc 1286 (146 147) */ sllx %o2,19,%g3
+/* 0x0fe0 1287 (147 149) */ ldx [%sp+96],%o2
+/* 0x0fe4 1288 (147 148) */ add %g5,%g3,%g3
+/* 0x0fe8 1289 (148 150) */ ldx [%sp+104],%g5
+/* 0x0fec 1290 (148 149) */ add %g3,%o0,%g3
+/* 0x0ff0 1291 (149 151) */ ld [%i1+60],%o0
+/* 0x0ff4 1292 (149 150) */ sllx %o2,19,%g4
+/* 0x0ff8 1293 (149 150) */ add %g3,%o1,%g3
+/* 0x0ffc 1294 (150 151) */ st %g2,[%i0+52]
+/* 0x1000 1295 (150 151) */ srlx %g3,32,%o1
+/* 0x1004 1296 (150 151) */ add %g5,%g4,%g4
+/* 0x1008 1297 (151 152) */ st %g3,[%i0+56]
+/* 0x100c 1298 (151 152) */ add %g4,%o0,%g2
+/* 0x1010 1299 (152 153) */ add %g2,%o1,%g2
+/* 0x1014 1300 (152 153) */ st %g2,[%i0+60]
+/* 0x1018 1304 (153 154) */ srlx %g2,32,%o7
+
+!
+! ENTRY .L77000061
+!
+
+ .L77000061: /* frequency 1.0 confidence 0.0 */
+/* 0x119c 1437 ( 0 1) */ or %g0,%o7,%i0
+
+!
+! ENTRY .L900000159
+!
+
+ .L900000159: /* frequency 1.0 confidence 0.0 */
+/* 0x11a0 ( 0 7) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x11a4 ( 2 4) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000073
+!
+
+ .L77000073: /* frequency 1.0 confidence 0.0 */
+ or %g0, %i4, %o2
+ or %g0, %o0, %o1
+ or %g0, %i3, %o0
+
+!
+! ENTRY .L77000052
+!
+
+ .L77000052: /* frequency 1.0 confidence 0.0 */
+/* 0x1028 1318 ( 0 1) */ andn %o2,%g2,%g2
+/* 0x102c 1319 ( 0 1) */ st %g2,[%sp+96]
+/* 0x1030 1325 ( 0 1) */ add %o0,1,%g3
+/* 0x1034 1326 ( 0 1) */ fmovd %f0,%f14
+/* 0x1038 1327 ( 1 2) */ srl %o2,19,%g2
+/* 0x103c 1328 ( 1 2) */ st %g2,[%sp+92]
+/* 0x1040 1329 ( 1 2) */ or %g0,0,%o5
+/* 0x1044 1330 ( 2 3) */ srl %g3,31,%g2
+/* 0x1048 1331 ( 2 5) */ ldd [%o1],%f6
+/* 0x104c 1335 ( 2 3) */ sethi %hi(0x1800),%g1
+/* 0x1050 1336 ( 3 4) */ add %g3,%g2,%g2
+/* 0x1054 1337 ( 3 4) */ xor %g1,-304,%g1
+/* 0x1058 1338 ( 3 6) */ ldd [%o1+8],%f20
+/* 0x105c 1339 ( 4 5) */ sra %g2,1,%o3
+/* 0x1060 1340 ( 4 5) */ fmovs %f6,%f8
+/* 0x1064 1341 ( 4 5) */ add %g1,%fp,%g3
+/* 0x1068 1342 ( 5 6) */ fmovs %f6,%f10
+/* 0x106c 1343 ( 5 7) */ ld [%sp+96],%f9
+/* 0x1070 1344 ( 5 6) */ subcc %o3,0,%g0
+/* 0x1074 1345 ( 6 8) */ ld [%sp+92],%f11
+/* 0x1078 1346 ( 6 7) */ sethi %hi(0x1800),%g1
+/* 0x107c 1347 ( 6 7) */ or %g0,%i2,%o1
+/* 0x1080 1348 ( 7 10) */ fsubd %f8,%f6,%f18
+/* 0x1084 1349 ( 7 8) */ xor %g1,-296,%g1
+/* 0x1088 1350 ( 7 8) */ or %g0,0,%g4
+/* 0x108c 1351 ( 8 11) */ fsubd %f10,%f6,%f16
+/* 0x1090 1352 ( 8 9) */ bleu,pt %icc,.L990000162 ! tprob=0.50
+/* 0x1094 ( 8 9) */ subcc %o0,0,%g0
+/* 0x1098 1354 ( 9 10) */ add %g1,%fp,%g2
+/* 0x109c 1355 ( 9 10) */ sethi %hi(0x1800),%g1
+/* 0x10a0 1356 (10 11) */ xor %g1,-288,%g1
+/* 0x10a4 1357 (10 11) */ subcc %o3,7,%g0
+/* 0x10a8 1358 (11 12) */ add %g1,%fp,%o7
+/* 0x10ac 1359 (11 12) */ sethi %hi(0x1800),%g1
+/* 0x10b0 1360 (12 13) */ xor %g1,-280,%g1
+/* 0x10b4 1361 (13 14) */ add %g1,%fp,%o4
+/* 0x10b8 1362 (13 14) */ bl,pn %icc,.L77000054 ! tprob=0.50
+/* 0x10bc (13 14) */ sub %o3,2,%o2
+/* 0x10c0 1364 (14 17) */ ldd [%o1],%f2
+/* 0x10c4 1365 (14 15) */ add %o1,16,%g5
+/* 0x10c8 1366 (14 15) */ or %g0,4,%g4
+/* 0x10cc 1367 (15 18) */ ldd [%o1+8],%f0
+/* 0x10d0 1368 (15 16) */ add %o1,8,%o1
+/* 0x10d4 1369 (16 18) */ fxnor %f14,%f2,%f6
+/* 0x10d8 1370 (16 19) */ ldd [%g5],%f4
+/* 0x10dc 1371 (16 17) */ add %o1,16,%o1
+/* 0x10e0 1372 (17 19) */ fxnor %f14,%f0,%f12
+/* 0x10e4 1373 (17 20) */ ldd [%o1],%f0
+/* 0x10e8 1374 (17 18) */ add %o1,8,%o1
+/* 0x10ec 1375 (18 21) */ fitod %f7,%f2
+/* 0x10f0 1376 (19 22) */ fitod %f6,%f6
+/* 0x10f4 1377 (20 22) */ fxnor %f14,%f4,%f10
+/* 0x10f8 1378 (21 24) */ fsubd %f20,%f2,%f2
+/* 0x10fc 1379 (22 24) */ fxnor %f14,%f0,%f8
+/* 0x1100 1380 (23 26) */ fitod %f13,%f4
+/* 0x1104 1381 (24 27) */ fsubd %f20,%f6,%f6
+/* 0x1108 1382 (24 27) */ fmuld %f2,%f16,%f0
+
+!
+! ENTRY .L990000154
+!
+
+ .L990000154: /* frequency 1.0 confidence 0.0 */
+/* 0x110c 1384 ( 0 3) */ ldd [%o1],%f24
+/* 0x1110 1385 ( 0 1) */ add %g4,3,%g4
+/* 0x1114 1386 ( 0 1) */ add %o4,96,%o4
+/* 0x1118 1387 ( 1 4) */ fitod %f11,%f22
+/* 0x111c 1388 ( 2 5) */ fsubd %f20,%f4,%f26
+/* 0x1120 1389 ( 2 3) */ subcc %g4,%o2,%g0
+/* 0x1124 1390 ( 2 3) */ add %o7,96,%o7
+/* 0x1128 1391 ( 2 5) */ fmuld %f6,%f18,%f28
+/* 0x112c 1392 ( 3 6) */ fmuld %f6,%f16,%f6
+/* 0x1130 1393 ( 3 4) */ add %g2,96,%g2
+/* 0x1134 1394 ( 3 4) */ add %g3,96,%g3
+/* 0x1138 1395 ( 4 7) */ fdtox %f0,%f0
+/* 0x113c 1396 ( 5 8) */ fitod %f12,%f4
+/* 0x1140 1397 ( 5 8) */ fmuld %f2,%f18,%f2
+/* 0x1144 1398 ( 6 9) */ fdtox %f28,%f12
+/* 0x1148 1399 ( 7 10) */ fdtox %f6,%f6
+/* 0x114c 1400 ( 7 8) */ std %f12,[%g3-96]
+/* 0x1150 1401 ( 8 9) */ std %f6,[%g2-96]
+/* 0x1154 1402 ( 8 11) */ fdtox %f2,%f2
+/* 0x1158 1403 ( 9 12) */ fsubd %f20,%f4,%f6
+/* 0x115c 1404 ( 9 10) */ std %f2,[%o7-96]
+/* 0x1160 1405 ( 9 10) */ add %o1,8,%o1
+/* 0x1164 1406 (10 12) */ fxnor %f14,%f24,%f12
+/* 0x1168 1407 (10 13) */ fmuld %f26,%f16,%f4
+/* 0x116c 1408 (10 11) */ std %f0,[%o4-96]
+/* 0x1170 1409 (11 14) */ ldd [%o1],%f0
+/* 0x1174 1410 (11 14) */ fitod %f9,%f2
+/* 0x1178 1411 (12 15) */ fsubd %f20,%f22,%f28
+/* 0x117c 1412 (12 15) */ fmuld %f6,%f18,%f24
+/* 0x1180 1413 (13 16) */ fmuld %f6,%f16,%f22
+/* 0x1184 1414 (13 16) */ fdtox %f4,%f4
+/* 0x1188 1415 (14 17) */ fitod %f10,%f6
+/* 0x118c 1416 (14 17) */ fmuld %f26,%f18,%f10
+/* 0x1190 1417 (15 18) */ fdtox %f24,%f24
+/* 0x1194 1418 (16 19) */ fdtox %f22,%f22
+/* 0x1198 1419 (16 17) */ std %f24,[%g3-64]
+/* 0x119c 1420 (17 18) */ std %f22,[%g2-64]
+/* 0x11a0 1421 (17 20) */ fdtox %f10,%f10
+/* 0x11a4 1422 (18 21) */ fsubd %f20,%f6,%f6
+/* 0x11a8 1423 (18 19) */ std %f10,[%o7-64]
+/* 0x11ac 1424 (18 19) */ add %o1,8,%o1
+/* 0x11b0 1425 (19 21) */ fxnor %f14,%f0,%f10
+/* 0x11b4 1426 (19 22) */ fmuld %f28,%f16,%f0
+/* 0x11b8 1427 (19 20) */ std %f4,[%o4-64]
+/* 0x11bc 1428 (20 23) */ ldd [%o1],%f22
+/* 0x11c0 1429 (20 23) */ fitod %f13,%f4
+/* 0x11c4 1430 (21 24) */ fsubd %f20,%f2,%f2
+/* 0x11c8 1431 (21 24) */ fmuld %f6,%f18,%f26
+/* 0x11cc 1432 (22 25) */ fmuld %f6,%f16,%f24
+/* 0x11d0 1433 (22 25) */ fdtox %f0,%f0
+/* 0x11d4 1434 (23 26) */ fitod %f8,%f6
+/* 0x11d8 1435 (23 26) */ fmuld %f28,%f18,%f8
+/* 0x11dc 1436 (24 27) */ fdtox %f26,%f26
+/* 0x11e0 1437 (25 28) */ fdtox %f24,%f24
+/* 0x11e4 1438 (25 26) */ std %f26,[%g3-32]
+/* 0x11e8 1439 (26 27) */ std %f24,[%g2-32]
+/* 0x11ec 1440 (26 29) */ fdtox %f8,%f8
+/* 0x11f0 1441 (27 30) */ fsubd %f20,%f6,%f6
+/* 0x11f4 1442 (27 28) */ std %f8,[%o7-32]
+/* 0x11f8 1443 (27 28) */ add %o1,8,%o1
+/* 0x11fc 1444 (28 30) */ fxnor %f14,%f22,%f8
+/* 0x1200 1445 (28 29) */ std %f0,[%o4-32]
+/* 0x1204 1446 (28 29) */ bcs,pt %icc,.L990000154 ! tprob=0.50
+/* 0x1208 (28 31) */ fmuld %f2,%f16,%f0
+
+!
+! ENTRY .L990000157
+!
+
+ .L990000157: /* frequency 1.0 confidence 0.0 */
+/* 0x120c 1449 ( 0 3) */ fitod %f12,%f28
+/* 0x1210 1450 ( 0 3) */ fmuld %f6,%f18,%f24
+/* 0x1214 1451 ( 0 1) */ add %g3,128,%g3
+/* 0x1218 1452 ( 1 4) */ fitod %f10,%f12
+/* 0x121c 1453 ( 1 4) */ fmuld %f6,%f16,%f26
+/* 0x1220 1454 ( 1 2) */ add %g2,128,%g2
+/* 0x1224 1455 ( 2 5) */ fsubd %f20,%f4,%f4
+/* 0x1228 1456 ( 2 5) */ fmuld %f2,%f18,%f22
+/* 0x122c 1457 ( 2 3) */ add %o7,128,%o7
+/* 0x1230 1458 ( 3 6) */ fdtox %f24,%f6
+/* 0x1234 1459 ( 3 4) */ std %f6,[%g3-128]
+/* 0x1238 1460 ( 3 4) */ add %o4,128,%o4
+/* 0x123c 1461 ( 4 7) */ fsubd %f20,%f28,%f2
+/* 0x1240 1462 ( 4 5) */ subcc %g4,%o3,%g0
+/* 0x1244 1463 ( 5 8) */ fitod %f11,%f6
+/* 0x1248 1464 ( 5 8) */ fmuld %f4,%f18,%f24
+/* 0x124c 1465 ( 6 9) */ fdtox %f26,%f10
+/* 0x1250 1466 ( 6 7) */ std %f10,[%g2-128]
+/* 0x1254 1467 ( 7 10) */ fdtox %f22,%f10
+/* 0x1258 1468 ( 7 8) */ std %f10,[%o7-128]
+/* 0x125c 1469 ( 7 10) */ fmuld %f2,%f18,%f26
+/* 0x1260 1470 ( 8 11) */ fsubd %f20,%f12,%f10
+/* 0x1264 1471 ( 8 11) */ fmuld %f2,%f16,%f2
+/* 0x1268 1472 ( 9 12) */ fsubd %f20,%f6,%f22
+/* 0x126c 1473 ( 9 12) */ fmuld %f4,%f16,%f12
+/* 0x1270 1474 (10 13) */ fdtox %f0,%f0
+/* 0x1274 1475 (10 11) */ std %f0,[%o4-128]
+/* 0x1278 1476 (11 14) */ fitod %f8,%f4
+/* 0x127c 1477 (11 14) */ fmuld %f10,%f18,%f6
+/* 0x1280 1478 (12 15) */ fdtox %f26,%f0
+/* 0x1284 1479 (12 13) */ std %f0,[%g3-96]
+/* 0x1288 1480 (12 15) */ fmuld %f10,%f16,%f10
+/* 0x128c 1481 (13 16) */ fdtox %f2,%f2
+/* 0x1290 1482 (13 14) */ std %f2,[%g2-96]
+/* 0x1294 1483 (14 17) */ fitod %f9,%f0
+/* 0x1298 1484 (14 17) */ fmuld %f22,%f18,%f2
+/* 0x129c 1485 (15 18) */ fdtox %f24,%f8
+/* 0x12a0 1486 (15 16) */ std %f8,[%o7-96]
+/* 0x12a4 1487 (16 19) */ fsubd %f20,%f4,%f4
+/* 0x12a8 1488 (16 19) */ fmuld %f22,%f16,%f8
+/* 0x12ac 1489 (17 20) */ fdtox %f12,%f12
+/* 0x12b0 1490 (17 18) */ std %f12,[%o4-96]
+/* 0x12b4 1491 (18 21) */ fsubd %f20,%f0,%f0
+/* 0x12b8 1492 (19 22) */ fdtox %f6,%f6
+/* 0x12bc 1493 (19 20) */ std %f6,[%g3-64]
+/* 0x12c0 1494 (20 23) */ fdtox %f10,%f10
+/* 0x12c4 1495 (20 21) */ std %f10,[%g2-64]
+/* 0x12c8 1496 (20 23) */ fmuld %f4,%f18,%f6
+/* 0x12cc 1497 (21 24) */ fdtox %f2,%f2
+/* 0x12d0 1498 (21 22) */ std %f2,[%o7-64]
+/* 0x12d4 1499 (21 24) */ fmuld %f4,%f16,%f4
+/* 0x12d8 1500 (22 25) */ fmuld %f0,%f18,%f2
+/* 0x12dc 1501 (22 25) */ fdtox %f8,%f8
+/* 0x12e0 1502 (22 23) */ std %f8,[%o4-64]
+/* 0x12e4 1503 (23 26) */ fdtox %f6,%f6
+/* 0x12e8 1504 (23 24) */ std %f6,[%g3-32]
+/* 0x12ec 1505 (23 26) */ fmuld %f0,%f16,%f0
+/* 0x12f0 1506 (24 27) */ fdtox %f4,%f4
+/* 0x12f4 1507 (24 25) */ std %f4,[%g2-32]
+/* 0x12f8 1508 (25 28) */ fdtox %f2,%f2
+/* 0x12fc 1509 (25 26) */ std %f2,[%o7-32]
+/* 0x1300 1510 (26 29) */ fdtox %f0,%f0
+/* 0x1304 1511 (26 27) */ bcc,pn %icc,.L77000056 ! tprob=0.50
+/* 0x1308 (26 27) */ std %f0,[%o4-32]
+
+!
+! ENTRY .L77000054
+!
+
+ .L77000054: /* frequency 1.0 confidence 0.0 */
+/* 0x130c 1514 ( 0 3) */ ldd [%o1],%f0
+
+!
+! ENTRY .L990000161
+!
+
+ .L990000161: /* frequency 1.0 confidence 0.0 */
+/* 0x1310 1516 ( 0 2) */ fxnor %f14,%f0,%f0
+/* 0x1314 1517 ( 0 1) */ add %g4,1,%g4
+/* 0x1318 1518 ( 0 1) */ add %o1,8,%o1
+/* 0x131c 1519 ( 1 2) */ subcc %g4,%o3,%g0
+/* 0x1320 1520 ( 2 5) */ fitod %f0,%f2
+/* 0x1324 1521 ( 3 6) */ fitod %f1,%f0
+/* 0x1328 1522 ( 5 8) */ fsubd %f20,%f2,%f2
+/* 0x132c 1523 ( 6 9) */ fsubd %f20,%f0,%f0
+/* 0x1330 1524 ( 8 11) */ fmuld %f2,%f18,%f6
+/* 0x1334 1525 ( 9 12) */ fmuld %f2,%f16,%f4
+/* 0x1338 1526 (10 13) */ fmuld %f0,%f18,%f2
+/* 0x133c 1527 (11 14) */ fdtox %f6,%f6
+/* 0x1340 1528 (11 12) */ std %f6,[%g3]
+/* 0x1344 1529 (11 14) */ fmuld %f0,%f16,%f0
+/* 0x1348 1530 (12 15) */ fdtox %f4,%f4
+/* 0x134c 1531 (12 13) */ std %f4,[%g2]
+/* 0x1350 1532 (12 13) */ add %g2,32,%g2
+/* 0x1354 1533 (13 16) */ fdtox %f2,%f2
+/* 0x1358 1534 (13 14) */ std %f2,[%o7]
+/* 0x135c 1535 (13 14) */ add %o7,32,%o7
+/* 0x1360 1536 (14 17) */ fdtox %f0,%f0
+/* 0x1364 1537 (14 15) */ std %f0,[%o4]
+/* 0x1368 1538 (14 15) */ add %o4,32,%o4
+/* 0x136c 1539 (15 16) */ add %g3,32,%g3
+/* 0x1370 1540 (15 16) */ bcs,a,pt %icc,.L990000161 ! tprob=0.50
+/* 0x1374 (16 19) */ ldd [%o1],%f0
+
+!
+! ENTRY .L77000056
+!
+
+ .L77000056: /* frequency 1.0 confidence 0.0 */
+/* 0x1378 1548 ( 0 1) */ subcc %o0,0,%g0
+
+!
+! ENTRY .L990000162
+!
+
+ .L990000162: /* frequency 1.0 confidence 0.0 */
+/* 0x137c 1550 ( 0 1) */ bleu,pt %icc,.L77770061 ! tprob=0.50
+/* 0x1380 ( 0 1) */ nop
+/* 0x1384 1555 ( 0 1) */ sethi %hi(0x1800),%g1
+/* 0x1388 1556 ( 1 2) */ xor %g1,-304,%g1
+/* 0x138c 1557 ( 1 2) */ or %g0,%i1,%g4
+/* 0x1390 1558 ( 2 3) */ add %g1,%fp,%g5
+/* 0x1394 1559 ( 2 3) */ sethi %hi(0x1800),%g1
+/* 0x1398 1560 ( 3 4) */ xor %g1,-296,%g1
+/* 0x139c 1561 ( 3 4) */ or %g0,%o0,%o7
+/* 0x13a0 1562 ( 4 5) */ add %g1,%fp,%g2
+/* 0x13a4 1563 ( 4 5) */ or %g0,0,%i2
+/* 0x13a8 1564 ( 5 6) */ or %g0,%i0,%g3
+/* 0x13ac 1565 ( 5 6) */ subcc %o0,6,%g0
+/* 0x13b0 1566 ( 5 6) */ bl,pn %icc,.L77000058 ! tprob=0.50
+/* 0x13b4 ( 6 7) */ sethi %hi(0x1800),%g1
+/* 0x13b8 1568 ( 6 8) */ ld [%g4],%o2
+/* 0x13bc 1569 ( 6 7) */ add %g3,4,%g3
+/* 0x13c0 1570 ( 7 8) */ xor %g1,-264,%g1
+/* 0x13c4 1571 ( 7 8) */ sub %o7,3,%o4
+/* 0x13c8 1572 ( 8 9) */ add %g1,%fp,%g2
+/* 0x13cc 1573 ( 8 9) */ sethi %hi(0x1800),%g1
+/* 0x13d0 1574 ( 9 10) */ xor %g1,-272,%g1
+/* 0x13d4 1575 ( 9 10) */ or %g0,2,%i2
+/* 0x13d8 1576 (10 11) */ add %g1,%fp,%g5
+/* 0x13dc 1577 (10 11) */ sethi %hi(0x1800),%g1
+/* 0x13e0 1578 (11 12) */ xor %g1,-296,%g1
+/* 0x13e4 1579 (12 13) */ add %g1,%fp,%g1
+/* 0x13e8 1580 (13 15) */ ldx [%g1],%o1
+/* 0x13ec 1581 (14 16) */ ldx [%g1-8],%o0
+/* 0x13f0 1582 (15 16) */ sllx %o1,19,%o1
+/* 0x13f4 1583 (15 17) */ ldx [%g1+16],%o3
+/* 0x13f8 1584 (16 17) */ add %o0,%o1,%o0
+/* 0x13fc 1585 (16 18) */ ld [%g4+4],%o1
+/* 0x1400 1586 (16 17) */ add %g4,8,%g4
+/* 0x1404 1587 (17 18) */ sllx %o3,19,%o3
+/* 0x1408 1588 (17 18) */ add %o0,%o2,%o0
+/* 0x140c 1589 (17 19) */ ldx [%g1+8],%o2
+/* 0x1410 1590 (18 19) */ st %o0,[%g3-4]
+/* 0x1414 1591 (18 19) */ srlx %o0,32,%o0
+
+!
+! ENTRY .L990000142
+!
+
+ .L990000142: /* frequency 1.0 confidence 0.0 */
+/* 0x1418 1593 ( 0 1) */ add %o2,%o3,%o2
+/* 0x141c 1594 ( 0 1) */ add %i2,4,%i2
+/* 0x1420 1595 ( 0 2) */ ld [%g4],%o3
+/* 0x1424 1596 ( 1 2) */ srl %o0,0,%o5
+/* 0x1428 1597 ( 1 2) */ add %o2,%o1,%o1
+/* 0x142c 1598 ( 1 3) */ ldx [%g2],%o0
+/* 0x1430 1599 ( 3 4) */ sllx %o0,19,%o2
+/* 0x1434 1600 ( 3 5) */ ldx [%g5],%o0
+/* 0x1438 1601 ( 3 4) */ add %o1,%o5,%o1
+/* 0x143c 1602 ( 4 5) */ st %o1,[%g3]
+/* 0x1440 1603 ( 4 5) */ srlx %o1,32,%o5
+/* 0x1444 1604 ( 4 5) */ subcc %i2,%o4,%g0
+/* 0x1448 1605 ( 5 7) */ ldx [%g2+16],%o1
+/* 0x144c 1606 ( 5 6) */ add %o0,%o2,%o0
+/* 0x1450 1607 ( 5 6) */ add %g3,16,%g3
+/* 0x1454 1608 ( 6 8) */ ld [%g4+4],%o2
+/* 0x1458 1609 ( 6 7) */ add %o0,%o3,%o0
+/* 0x145c 1610 ( 7 8) */ sllx %o1,19,%o3
+/* 0x1460 1611 ( 7 9) */ ldx [%g5+16],%o1
+/* 0x1464 1612 ( 7 8) */ add %o0,%o5,%o0
+/* 0x1468 1613 ( 8 9) */ st %o0,[%g3-12]
+/* 0x146c 1614 ( 8 9) */ srlx %o0,32,%o5
+/* 0x1470 1615 ( 8 9) */ add %g4,16,%g4
+/* 0x1474 1616 ( 9 11) */ ldx [%g2+32],%o0
+/* 0x1478 1617 ( 9 10) */ add %o1,%o3,%o1
+/* 0x147c 1618 ( 9 10) */ add %g2,64,%g2
+/* 0x1480 1619 (10 12) */ ld [%g4-8],%o3
+/* 0x1484 1620 (10 11) */ add %o1,%o2,%o2
+/* 0x1488 1621 (11 12) */ sllx %o0,19,%o1
+/* 0x148c 1622 (11 13) */ ldx [%g5+32],%o0
+/* 0x1490 1623 (11 12) */ add %o2,%o5,%o2
+/* 0x1494 1624 (12 13) */ st %o2,[%g3-8]
+/* 0x1498 1625 (12 13) */ srlx %o2,32,%o5
+/* 0x149c 1626 (12 13) */ add %g5,64,%g5
+/* 0x14a0 1627 (13 15) */ ldx [%g2-16],%o2
+/* 0x14a4 1628 (13 14) */ add %o0,%o1,%o0
+/* 0x14a8 1629 (14 16) */ ld [%g4-4],%o1
+/* 0x14ac 1630 (14 15) */ add %o0,%o3,%o0
+/* 0x14b0 1631 (15 16) */ sllx %o2,19,%o3
+/* 0x14b4 1632 (15 17) */ ldx [%g5-16],%o2
+/* 0x14b8 1633 (15 16) */ add %o0,%o5,%o0
+/* 0x14bc 1634 (16 17) */ st %o0,[%g3-4]
+/* 0x14c0 1635 (16 17) */ bcs,pt %icc,.L990000142 ! tprob=0.50
+/* 0x14c4 (16 17) */ srlx %o0,32,%o0
+
+!
+! ENTRY .L990000145
+!
+
+ .L990000145: /* frequency 1.0 confidence 0.0 */
+/* 0x14c8 1638 ( 0 1) */ add %o2,%o3,%o3
+/* 0x14cc 1639 ( 0 1) */ add %g3,4,%g3
+/* 0x14d0 1640 ( 1 2) */ srl %o0,0,%o2
+/* 0x14d4 1641 ( 1 2) */ add %o3,%o1,%o0
+/* 0x14d8 1642 ( 2 3) */ add %o0,%o2,%o0
+/* 0x14dc 1643 ( 2 3) */ st %o0,[%g3-4]
+/* 0x14e0 1644 ( 2 3) */ subcc %i2,%o7,%g0
+/* 0x14e4 1645 ( 2 3) */ bcc,pn %icc,.L77770061 ! tprob=0.50
+/* 0x14e8 ( 3 4) */ srlx %o0,32,%o5
+
+!
+! ENTRY .L77000058
+!
+
+ .L77000058: /* frequency 1.0 confidence 0.0 */
+/* 0x14ec 1648 ( 0 2) */ ldx [%g2],%o2
+
+!
+! ENTRY .L990000160
+!
+
+ .L990000160: /* frequency 1.0 confidence 0.0 */
+/* 0x14f0 1650 ( 0 1) */ sllx %o2,19,%o3
+/* 0x14f4 1651 ( 0 2) */ ldx [%g5],%o0
+/* 0x14f8 1652 ( 0 1) */ add %i2,1,%i2
+/* 0x14fc 1653 ( 1 2) */ srl %o5,0,%o1
+/* 0x1500 1654 ( 1 3) */ ld [%g4],%o2
+/* 0x1504 1655 ( 1 2) */ add %g2,16,%g2
+/* 0x1508 1656 ( 2 3) */ add %o0,%o3,%o0
+/* 0x150c 1657 ( 2 3) */ add %g5,16,%g5
+/* 0x1510 1658 ( 3 4) */ add %o0,%o2,%o0
+/* 0x1514 1659 ( 3 4) */ add %g4,4,%g4
+/* 0x1518 1660 ( 4 5) */ add %o0,%o1,%o0
+/* 0x151c 1661 ( 4 5) */ st %o0,[%g3]
+/* 0x1520 1662 ( 4 5) */ subcc %i2,%o7,%g0
+/* 0x1524 1663 ( 5 6) */ srlx %o0,32,%o5
+/* 0x1528 1664 ( 5 6) */ add %g3,4,%g3
+/* 0x152c 1665 ( 5 6) */ bcs,a,pt %icc,.L990000160 ! tprob=0.50
+/* 0x1530 ( 6 8) */ ldx [%g2],%o2
+
+!
+! ENTRY .L77770061
+!
+
+ .L77770061: /* frequency 1.0 confidence 0.0 */
+/* 0x1534 ( 0 2) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x1538 ( 2 3) */ restore %g0,%o5,%o0
+
+/* 0x11a8 1441 ( 0 0) */ .type mul_add,2
+/* 0x11a8 1442 ( 0 0) */ .size mul_add,(.-mul_add)
+/* 0x11a8 1445 ( 0 0) */ .align 16
+/* 0x11b0 1451 ( 0 0) */ .global mul_add_inp
+
+!
+! ENTRY mul_add_inp
+!
+
+ .global mul_add_inp
+ mul_add_inp: /* frequency 1.0 confidence 0.0 */
+/* 0x11b0 1453 ( 0 1) */ or %g0,%o2,%g1
+/* 0x11b4 1454 ( 0 1) */ or %g0,%o3,%o4
+/* 0x11b8 1455 ( 1 2) */ or %g0,%o0,%g3
+/* 0x11bc 1456 ( 1 2) */ or %g0,%o1,%g2
+/* 0x11c0 1466 ( 2 3) */ or %g0,%g1,%o3
+/* 0x11c4 1467 ( 2 3) */ or %g0,%g3,%o1
+/* 0x11c8 1468 ( 3 4) */ or %g0,%g2,%o2
+/* 0x11cc 1469 ( 3 4) */ or %g0,%o7,%g1
+/* 0x11d0 1470 ( 4 6) */ call mul_add ! params = ! Result =
+/* 0x11d4 ( 5 6) */ or %g0,%g1,%o7
+/* 0x11d8 1472 ( 0 0) */ .type mul_add_inp,2
+/* 0x11d8 1473 ( 0 0) */ .size mul_add_inp,(.-mul_add_inp)
+
+ .section ".data",#alloc,#write
+/* 0x11d8 6 ( 0 0) */ .align 8
+
+!
+! ENTRY mask_cnst
+!
+
+ mask_cnst: /* frequency 1.0 confidence 0.0 */
+/* 0x11d8 8 ( 0 0) */ .word -2147483648
+/* 0x11dc 9 ( 0 0) */ .word -2147483648
+/* 0x11e0 10 ( 0 0) */ .type mask_cnst,#object
+/* 0x11e0 11 ( 0 0) */ .size mask_cnst,8
+
diff --git a/security/nss/lib/freebl/mpi/mpv_sparcv9.s b/security/nss/lib/freebl/mpi/mpv_sparcv9.s
new file mode 100644
index 0000000000..e2fbe0bd00
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpv_sparcv9.s
@@ -0,0 +1,1645 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .register %g2,#scratch
+/* 000000 ( 0 0) */ .register %g3,#scratch
+/* 000000 3 ( 0 0) */ .file "mpv_sparc.c"
+/* 000000 15 ( 0 0) */ .align 8
+!
+! SUBROUTINE .L_const_seg_900000101
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .L_const_seg_900000101: /* frequency 1.0 confidence 0.0 */
+/* 000000 20 ( 0 0) */ .word 1127219200,0
+/* 0x0008 21 ( 0 0) */ .word 1105199103,-4194304
+/* 0x0010 22 ( 0 0) */ .align 8
+/* 0x0010 28 ( 0 0) */ .global mul_add
+
+!
+! ENTRY mul_add
+!
+
+ .global mul_add
+ mul_add: /* frequency 1.0 confidence 0.0 */
+/* 0x0010 30 ( 0 1) */ sethi %hi(0x1c00),%g1
+/* 0x0014 31 ( 0 1) */ sethi %hi(mask_cnst),%g2
+/* 0x0018 32 ( 1 2) */ xor %g1,-48,%g1
+/* 0x001c 33 ( 1 2) */ add %g2,%lo(mask_cnst),%g2
+/* 0x0020 34 ( 2 3) */ save %sp,%g1,%sp
+
+!
+! ENTRY .L900000149
+!
+
+ .L900000149: /* frequency 1.0 confidence 0.0 */
+/* 0x0024 36 ( 0 2) */ call (.+0x8) ! params = ! Result =
+/* 0x0028 ( 1 2) */ sethi %hi((_GLOBAL_OFFSET_TABLE_-(.L900000149-.))),%g5
+/* 0x002c 178 ( 2 3) */ sethi %hi(.L_const_seg_900000101),%g3
+/* 0x0030 179 ( 2 3) */ add %g5,%lo((_GLOBAL_OFFSET_TABLE_-(.L900000149-.))),%g5
+/* 0x0034 180 ( 3 4) */ add %g3,%lo(.L_const_seg_900000101),%g3
+/* 0x0038 181 ( 3 4) */ add %g5,%o7,%o1
+/* 0x003c 182 ( 4 5) */ sethi %hi(0x80000),%g4
+/* 0x0040 183 ( 4 6) */ ldx [%o1+%g2],%g2
+/* 0x0044 184 ( 4 5) */ or %g0,%i2,%o2
+/* 0x0048 185 ( 5 6) */ subcc %i4,%g4,%g0
+/* 0x004c 186 ( 5 7) */ ldx [%o1+%g3],%o0
+/* 0x0050 187 ( 6 7) */ or %g0,%i0,%o7
+/* 0x0054 188 ( 6 7) */ or %g0,%i1,%o5
+/* 0x0058 189 ( 6 9) */ ldd [%g2],%f0
+/* 0x005c 190 ( 6 7) */ bcc,pn %icc,.L77000048 ! tprob=0.50
+/* 0x0060 ( 7 8) */ subcc %i3,8,%g0
+/* 0x0064 192 ( 7 8) */ bne,pn %icc,.L900000158 ! tprob=0.50
+/* 0x0068 ( 8 9) */ subcc %i3,16,%g0
+/* 0x006c 194 ( 9 12) */ ldd [%o2],%f4
+/* 0x0070 195 (10 11) */ st %i4,[%sp+2287]
+/* 0x0074 196 (11 14) */ ldd [%o0],%f8
+/* 0x0078 197 (11 13) */ fxnor %f0,%f4,%f4
+/* 0x007c 198 (12 15) */ ldd [%o2+8],%f10
+/* 0x0080 199 (13 16) */ fitod %f4,%f12
+/* 0x0084 200 (13 16) */ ldd [%o0+8],%f14
+/* 0x0088 201 (14 17) */ ld [%sp+2287],%f7
+/* 0x008c 202 (14 17) */ fitod %f5,%f4
+/* 0x0090 203 (15 17) */ fxnor %f0,%f10,%f10
+/* 0x0094 204 (15 18) */ ldd [%o2+16],%f16
+/* 0x0098 205 (16 19) */ ldd [%o2+24],%f18
+/* 0x009c 206 (17 20) */ fsubd %f14,%f4,%f4
+/* 0x00a0 210 (17 20) */ ld [%i1],%g2
+/* 0x00a4 211 (18 20) */ fxnor %f0,%f16,%f16
+/* 0x00a8 212 (18 21) */ ld [%i1+4],%g3
+/* 0x00ac 213 (19 22) */ ld [%i1+8],%g4
+/* 0x00b0 214 (20 23) */ fitod %f16,%f20
+/* 0x00b4 215 (20 23) */ ld [%i1+16],%o0
+/* 0x00b8 216 (21 24) */ ld [%i1+12],%g5
+/* 0x00bc 217 (22 25) */ ld [%i1+20],%o1
+/* 0x00c0 218 (23 26) */ ld [%i1+24],%o2
+/* 0x00c4 219 (24 25) */ fmovs %f8,%f6
+/* 0x00c8 220 (24 27) */ ld [%i1+28],%o3
+/* 0x00cc 221 (26 29) */ fsubd %f6,%f8,%f6
+/* 0x00d0 222 (27 30) */ fsubd %f14,%f12,%f8
+/* 0x00d4 223 (28 31) */ fitod %f10,%f12
+/* 0x00d8 224 (29 32) */ fmuld %f4,%f6,%f4
+/* 0x00dc 225 (29 32) */ fitod %f11,%f10
+/* 0x00e0 226 (30 33) */ fmuld %f8,%f6,%f8
+/* 0x00e4 227 (31 34) */ fsubd %f14,%f12,%f12
+/* 0x00e8 228 (32 35) */ fdtox %f4,%f4
+/* 0x00ec 229 (32 33) */ std %f4,[%sp+2271]
+/* 0x00f0 230 (33 36) */ fdtox %f8,%f8
+/* 0x00f4 231 (33 34) */ std %f8,[%sp+2279]
+/* 0x00f8 232 (34 37) */ fmuld %f12,%f6,%f12
+/* 0x00fc 233 (34 37) */ fsubd %f14,%f10,%f10
+/* 0x0100 234 (35 38) */ fsubd %f14,%f20,%f4
+/* 0x0104 235 (36 39) */ fitod %f17,%f8
+/* 0x0108 236 (37 39) */ fxnor %f0,%f18,%f16
+/* 0x010c 237 (37 39) */ ldx [%sp+2279],%o4
+/* 0x0110 238 (37 40) */ fmuld %f10,%f6,%f10
+/* 0x0114 239 (38 41) */ fdtox %f12,%f12
+/* 0x0118 240 (38 39) */ std %f12,[%sp+2263]
+/* 0x011c 241 (38 41) */ fmuld %f4,%f6,%f4
+/* 0x0120 242 (39 42) */ fitod %f16,%f18
+/* 0x0124 243 (39 40) */ add %o4,%g2,%g2
+/* 0x0128 244 (39 40) */ st %g2,[%i0]
+/* 0x012c 245 (40 42) */ ldx [%sp+2271],%o4
+/* 0x0130 246 (40 43) */ fsubd %f14,%f8,%f8
+/* 0x0134 247 (40 41) */ srax %g2,32,%o5
+/* 0x0138 248 (41 44) */ fdtox %f10,%f10
+/* 0x013c 249 (41 42) */ std %f10,[%sp+2255]
+/* 0x0140 250 (42 45) */ fdtox %f4,%f4
+/* 0x0144 251 (42 43) */ std %f4,[%sp+2247]
+/* 0x0148 252 (42 43) */ add %o4,%g3,%o4
+/* 0x014c 253 (43 46) */ fitod %f17,%f12
+/* 0x0150 254 (43 45) */ ldx [%sp+2263],%g2
+/* 0x0154 255 (43 44) */ add %o4,%o5,%g3
+/* 0x0158 256 (43 46) */ fmuld %f8,%f6,%f8
+/* 0x015c 257 (44 47) */ fsubd %f14,%f18,%f10
+/* 0x0160 258 (44 45) */ st %g3,[%i0+4]
+/* 0x0164 259 (44 45) */ srax %g3,32,%g3
+/* 0x0168 260 (45 46) */ add %g2,%g4,%g4
+/* 0x016c 261 (45 47) */ ldx [%sp+2255],%g2
+/* 0x0170 262 (46 49) */ fsubd %f14,%f12,%f4
+/* 0x0174 263 (46 47) */ add %g4,%g3,%g3
+/* 0x0178 264 (46 48) */ ldx [%sp+2247],%g4
+/* 0x017c 265 (47 50) */ fmuld %f10,%f6,%f10
+/* 0x0180 266 (47 50) */ fdtox %f8,%f8
+/* 0x0184 267 (47 48) */ std %f8,[%sp+2239]
+/* 0x0188 268 (48 49) */ add %g4,%o0,%g4
+/* 0x018c 269 (48 49) */ add %g2,%g5,%g2
+/* 0x0190 270 (48 49) */ st %g3,[%i0+8]
+/* 0x0194 271 (49 52) */ fmuld %f4,%f6,%f4
+/* 0x0198 272 (49 50) */ srax %g3,32,%o0
+/* 0x019c 273 (49 51) */ ldx [%sp+2239],%g5
+/* 0x01a0 274 (50 53) */ fdtox %f10,%f6
+/* 0x01a4 275 (50 51) */ std %f6,[%sp+2231]
+/* 0x01a8 276 (50 51) */ add %g2,%o0,%g2
+/* 0x01ac 277 (51 52) */ srax %g2,32,%g3
+/* 0x01b0 278 (51 52) */ add %g5,%o1,%o1
+/* 0x01b4 279 (51 52) */ st %g2,[%i0+12]
+/* 0x01b8 280 (52 55) */ fdtox %f4,%f4
+/* 0x01bc 281 (52 53) */ std %f4,[%sp+2223]
+/* 0x01c0 282 (52 53) */ add %g4,%g3,%g3
+/* 0x01c4 283 (53 54) */ srax %g3,32,%g4
+/* 0x01c8 284 (53 54) */ st %g3,[%i0+16]
+/* 0x01cc 285 (54 56) */ ldx [%sp+2231],%o0
+/* 0x01d0 286 (54 55) */ add %o1,%g4,%g4
+/* 0x01d4 287 (55 56) */ srax %g4,32,%g2
+/* 0x01d8 288 (55 57) */ ldx [%sp+2223],%g5
+/* 0x01dc 289 (56 57) */ add %o0,%o2,%o2
+/* 0x01e0 290 (56 57) */ st %g4,[%i0+20]
+/* 0x01e4 291 (57 58) */ add %o2,%g2,%g2
+/* 0x01e8 292 (57 58) */ add %g5,%o3,%g5
+/* 0x01ec 293 (57 58) */ st %g2,[%i0+24]
+/* 0x01f0 294 (58 59) */ srax %g2,32,%g3
+/* 0x01f4 295 (59 60) */ add %g5,%g3,%g2
+/* 0x01f8 296 (59 60) */ st %g2,[%i0+28]
+/* 0x01fc 300 (60 61) */ srax %g2,32,%o3
+/* 0x0200 301 (61 62) */ srl %o3,0,%i0
+/* 0x0204 (62 64) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0208 (64 65) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L900000158
+!
+
+ .L900000158: /* frequency 1.0 confidence 0.0 */
+/* 0x020c 308 ( 0 1) */ bne,a,pn %icc,.L900000157 ! tprob=0.50
+/* 0x0210 ( 0 1) */ st %i4,[%sp+2223]
+/* 0x0214 315 ( 1 4) */ ldd [%o2],%f4
+/* 0x0218 316 ( 2 3) */ st %i4,[%sp+2351]
+/* 0x021c 317 ( 3 6) */ ldd [%o0],%f8
+/* 0x0220 318 ( 3 5) */ fxnor %f0,%f4,%f4
+/* 0x0224 319 ( 4 7) */ ldd [%o2+8],%f10
+/* 0x0228 320 ( 5 8) */ ldd [%o0+8],%f14
+/* 0x022c 321 ( 5 8) */ fitod %f4,%f12
+/* 0x0230 322 ( 6 9) */ ld [%sp+2351],%f7
+/* 0x0234 323 ( 6 8) */ fxnor %f0,%f10,%f10
+/* 0x0238 324 ( 7 10) */ ldd [%o2+16],%f16
+/* 0x023c 325 ( 7 10) */ fitod %f5,%f4
+/* 0x0240 326 ( 8 11) */ ldd [%o2+24],%f18
+/* 0x0244 330 ( 9 12) */ ldd [%o2+32],%f20
+/* 0x0248 331 ( 9 11) */ fxnor %f0,%f16,%f16
+/* 0x024c 335 (10 13) */ ld [%i1],%g2
+/* 0x0250 336 (10 13) */ fsubd %f14,%f4,%f4
+/* 0x0254 337 (11 14) */ ldd [%o2+40],%f22
+/* 0x0258 338 (11 14) */ fitod %f16,%f28
+/* 0x025c 339 (12 15) */ ld [%i1+4],%g3
+/* 0x0260 340 (13 16) */ ld [%i1+8],%g4
+/* 0x0264 341 (13 15) */ fxnor %f0,%f22,%f22
+/* 0x0268 342 (14 17) */ ld [%i1+12],%g5
+/* 0x026c 343 (15 18) */ ld [%i1+16],%o0
+/* 0x0270 344 (16 19) */ ldd [%o2+48],%f24
+/* 0x0274 345 (17 20) */ ld [%i1+20],%o1
+/* 0x0278 346 (17 18) */ fmovs %f8,%f6
+/* 0x027c 347 (18 21) */ ldd [%o2+56],%f26
+/* 0x0280 348 (19 22) */ ld [%i1+24],%o2
+/* 0x0284 349 (19 22) */ fsubd %f6,%f8,%f6
+/* 0x0288 350 (20 23) */ ld [%i1+28],%o3
+/* 0x028c 351 (20 23) */ fsubd %f14,%f12,%f8
+/* 0x0290 355 (21 24) */ ld [%i1+32],%o4
+/* 0x0294 356 (21 24) */ fitod %f10,%f12
+/* 0x0298 357 (22 25) */ ld [%i1+36],%o7
+/* 0x029c 358 (22 25) */ fitod %f11,%f10
+/* 0x02a0 359 (22 25) */ fmuld %f4,%f6,%f4
+/* 0x02a4 360 (23 26) */ ld [%i1+40],%l1
+/* 0x02a8 361 (23 26) */ fmuld %f8,%f6,%f8
+/* 0x02ac 362 (24 27) */ ld [%i1+56],%l5
+/* 0x02b0 363 (24 27) */ fsubd %f14,%f12,%f12
+/* 0x02b4 364 (25 28) */ fsubd %f14,%f10,%f10
+/* 0x02b8 365 (26 29) */ fdtox %f8,%f8
+/* 0x02bc 366 (26 27) */ std %f8,[%sp+2343]
+/* 0x02c0 367 (27 30) */ fitod %f17,%f8
+/* 0x02c4 368 (27 30) */ fmuld %f12,%f6,%f12
+/* 0x02c8 369 (28 31) */ fdtox %f4,%f4
+/* 0x02cc 370 (28 29) */ std %f4,[%sp+2335]
+/* 0x02d0 371 (28 31) */ fmuld %f10,%f6,%f10
+/* 0x02d4 372 (29 31) */ fxnor %f0,%f18,%f16
+/* 0x02d8 373 (30 33) */ fdtox %f12,%f12
+/* 0x02dc 374 (30 31) */ std %f12,[%sp+2327]
+/* 0x02e0 375 (31 33) */ ldx [%sp+2343],%o5
+/* 0x02e4 376 (31 34) */ fsubd %f14,%f8,%f8
+/* 0x02e8 377 (32 35) */ fsubd %f14,%f28,%f4
+/* 0x02ec 378 (33 36) */ fitod %f17,%f12
+/* 0x02f0 379 (33 34) */ add %o5,%g2,%g2
+/* 0x02f4 380 (33 34) */ st %g2,[%i0]
+/* 0x02f8 381 (34 36) */ ldx [%sp+2335],%o5
+/* 0x02fc 382 (34 37) */ fitod %f16,%f18
+/* 0x0300 383 (34 35) */ srax %g2,32,%l0
+/* 0x0304 384 (35 37) */ fxnor %f0,%f20,%f16
+/* 0x0308 385 (35 38) */ fmuld %f8,%f6,%f20
+/* 0x030c 386 (36 39) */ fdtox %f10,%f10
+/* 0x0310 387 (36 37) */ std %f10,[%sp+2319]
+/* 0x0314 388 (36 37) */ add %o5,%g3,%g3
+/* 0x0318 389 (36 39) */ fmuld %f4,%f6,%f4
+/* 0x031c 390 (37 40) */ fitod %f16,%f8
+/* 0x0320 391 (37 38) */ add %g3,%l0,%g3
+/* 0x0324 392 (37 38) */ st %g3,[%i0+4]
+/* 0x0328 393 (38 40) */ ldx [%sp+2327],%o5
+/* 0x032c 394 (38 41) */ fsubd %f14,%f18,%f18
+/* 0x0330 395 (38 39) */ srax %g3,32,%l3
+/* 0x0334 396 (39 41) */ ldx [%sp+2319],%l2
+/* 0x0338 397 (39 42) */ fdtox %f4,%f4
+/* 0x033c 398 (40 41) */ std %f4,[%sp+2311]
+/* 0x0340 399 (40 43) */ fdtox %f20,%f20
+/* 0x0344 400 (40 41) */ add %o5,%g4,%g4
+/* 0x0348 401 (41 42) */ std %f20,[%sp+2303]
+/* 0x034c 402 (41 44) */ fsubd %f14,%f12,%f4
+/* 0x0350 403 (41 42) */ add %g4,%l3,%g4
+/* 0x0354 404 (41 44) */ fmuld %f18,%f6,%f18
+/* 0x0358 405 (42 43) */ st %g4,[%i0+8]
+/* 0x035c 406 (42 45) */ fitod %f17,%f16
+/* 0x0360 407 (42 43) */ srax %g4,32,%l4
+/* 0x0364 408 (43 46) */ ld [%i1+44],%l0
+/* 0x0368 409 (43 46) */ fsubd %f14,%f8,%f20
+/* 0x036c 410 (43 44) */ add %l2,%g5,%l2
+/* 0x0370 411 (44 46) */ ldx [%sp+2311],%g5
+/* 0x0374 412 (44 47) */ fitod %f22,%f8
+/* 0x0378 413 (44 45) */ add %l2,%l4,%l2
+/* 0x037c 414 (44 47) */ fmuld %f4,%f6,%f4
+/* 0x0380 415 (45 46) */ st %l2,[%i0+12]
+/* 0x0384 416 (45 48) */ fsubd %f14,%f16,%f10
+/* 0x0388 417 (46 49) */ ld [%i1+52],%l3
+/* 0x038c 418 (46 49) */ fdtox %f18,%f18
+/* 0x0390 419 (46 47) */ add %g5,%o0,%l4
+/* 0x0394 420 (46 49) */ fmuld %f20,%f6,%f12
+/* 0x0398 421 (47 48) */ std %f18,[%sp+2295]
+/* 0x039c 422 (47 48) */ srax %l2,32,%o0
+/* 0x03a0 423 (47 50) */ fitod %f23,%f16
+/* 0x03a4 424 (48 51) */ ld [%i1+48],%o5
+/* 0x03a8 425 (48 51) */ fsubd %f14,%f8,%f8
+/* 0x03ac 426 (48 49) */ add %l4,%o0,%l4
+/* 0x03b0 427 (49 50) */ st %l4,[%i0+16]
+/* 0x03b4 428 (49 50) */ srax %l4,32,%o0
+/* 0x03b8 429 (49 51) */ fxnor %f0,%f24,%f18
+/* 0x03bc 430 (50 52) */ ldx [%sp+2303],%g5
+/* 0x03c0 431 (50 53) */ fdtox %f4,%f4
+/* 0x03c4 432 (51 52) */ std %f4,[%sp+2287]
+/* 0x03c8 433 (51 54) */ fdtox %f12,%f12
+/* 0x03cc 434 (51 54) */ fmuld %f10,%f6,%f4
+/* 0x03d0 435 (52 53) */ std %f12,[%sp+2279]
+/* 0x03d4 436 (52 55) */ fsubd %f14,%f16,%f12
+/* 0x03d8 437 (52 53) */ add %g5,%o1,%g2
+/* 0x03dc 438 (52 55) */ fmuld %f8,%f6,%f8
+/* 0x03e0 439 (53 55) */ ldx [%sp+2295],%g5
+/* 0x03e4 440 (53 56) */ fitod %f18,%f10
+/* 0x03e8 441 (53 54) */ add %g2,%o0,%g2
+/* 0x03ec 442 (54 55) */ st %g2,[%i0+20]
+/* 0x03f0 443 (54 57) */ fitod %f19,%f16
+/* 0x03f4 444 (54 55) */ srax %g2,32,%o0
+/* 0x03f8 445 (55 58) */ fdtox %f8,%f8
+/* 0x03fc 446 (55 56) */ std %f8,[%sp+2263]
+/* 0x0400 447 (55 56) */ add %g5,%o2,%g3
+/* 0x0404 448 (56 58) */ ldx [%sp+2287],%g5
+/* 0x0408 449 (56 59) */ fsubd %f14,%f10,%f10
+/* 0x040c 450 (56 57) */ add %g3,%o0,%g3
+/* 0x0410 451 (57 58) */ st %g3,[%i0+24]
+/* 0x0414 452 (57 60) */ fsubd %f14,%f16,%f8
+/* 0x0418 453 (57 58) */ srax %g3,32,%o0
+/* 0x041c 454 (58 61) */ fdtox %f4,%f4
+/* 0x0420 455 (58 59) */ std %f4,[%sp+2271]
+/* 0x0424 456 (58 59) */ add %g5,%o3,%g4
+/* 0x0428 457 (59 61) */ fxnor %f0,%f26,%f18
+/* 0x042c 458 (59 62) */ fmuld %f12,%f6,%f4
+/* 0x0430 459 (59 60) */ add %g4,%o0,%g4
+/* 0x0434 460 (60 61) */ st %g4,[%i0+28]
+/* 0x0438 461 (60 63) */ fmuld %f10,%f6,%f10
+/* 0x043c 462 (60 61) */ srax %g4,32,%o0
+/* 0x0440 463 (61 63) */ ldx [%sp+2279],%g5
+/* 0x0444 464 (61 64) */ fitod %f18,%f12
+/* 0x0448 465 (61 64) */ fmuld %f8,%f6,%f8
+/* 0x044c 466 (62 65) */ fdtox %f4,%f4
+/* 0x0450 467 (62 63) */ std %f4,[%sp+2255]
+/* 0x0454 468 (63 64) */ add %g5,%o4,%l2
+/* 0x0458 469 (63 65) */ ldx [%sp+2271],%g5
+/* 0x045c 470 (63 66) */ fdtox %f10,%f16
+/* 0x0460 471 (64 67) */ fsubd %f14,%f12,%f4
+/* 0x0464 472 (64 65) */ std %f16,[%sp+2247]
+/* 0x0468 473 (64 65) */ add %l2,%o0,%l2
+/* 0x046c 474 (65 68) */ fdtox %f8,%f8
+/* 0x0470 475 (65 66) */ std %f8,[%sp+2239]
+/* 0x0474 476 (65 66) */ add %g5,%o7,%l4
+/* 0x0478 477 (66 69) */ fitod %f19,%f10
+/* 0x047c 478 (66 68) */ ldx [%sp+2263],%g5
+/* 0x0480 479 (66 67) */ srax %l2,32,%o0
+/* 0x0484 480 (67 68) */ add %l4,%o0,%l4
+/* 0x0488 481 (67 70) */ fmuld %f4,%f6,%f4
+/* 0x048c 482 (67 69) */ ldx [%sp+2255],%o0
+/* 0x0490 483 (68 69) */ srax %l4,32,%o1
+/* 0x0494 484 (68 69) */ add %g5,%l1,%l1
+/* 0x0498 485 (68 69) */ st %l2,[%i0+32]
+/* 0x049c 486 (69 72) */ fsubd %f14,%f10,%f8
+/* 0x04a0 487 (69 71) */ ldx [%sp+2239],%o3
+/* 0x04a4 488 (69 70) */ add %l1,%o1,%o1
+/* 0x04a8 489 (70 72) */ ldx [%sp+2247],%g5
+/* 0x04ac 490 (70 71) */ srax %o1,32,%o2
+/* 0x04b0 491 (70 71) */ add %o0,%l0,%o0
+/* 0x04b4 492 (71 74) */ fdtox %f4,%f4
+/* 0x04b8 493 (71 72) */ std %f4,[%sp+2231]
+/* 0x04bc 494 (71 72) */ add %o0,%o2,%o2
+/* 0x04c0 495 (72 73) */ add %o3,%l3,%l3
+/* 0x04c4 496 (72 75) */ fmuld %f8,%f6,%f4
+/* 0x04c8 497 (72 73) */ add %g5,%o5,%g5
+/* 0x04cc 498 (73 74) */ srax %o2,32,%o3
+/* 0x04d0 499 (73 74) */ st %l4,[%i0+36]
+/* 0x04d4 500 (74 75) */ add %g5,%o3,%g2
+/* 0x04d8 501 (74 76) */ ldx [%sp+2231],%o0
+/* 0x04dc 502 (75 76) */ srax %g2,32,%g3
+/* 0x04e0 503 (75 78) */ fdtox %f4,%f4
+/* 0x04e4 504 (75 76) */ std %f4,[%sp+2223]
+/* 0x04e8 505 (76 77) */ st %o1,[%i0+40]
+/* 0x04ec 506 (76 77) */ add %l3,%g3,%g3
+/* 0x04f0 507 (76 77) */ add %o0,%l5,%g5
+/* 0x04f4 508 (77 78) */ st %o2,[%i0+44]
+/* 0x04f8 509 (77 78) */ srax %g3,32,%g4
+/* 0x04fc 510 (78 79) */ st %g2,[%i0+48]
+/* 0x0500 511 (78 79) */ add %g5,%g4,%g4
+/* 0x0504 512 (79 80) */ st %g3,[%i0+52]
+/* 0x0508 513 (79 80) */ srax %g4,32,%g5
+/* 0x050c 514 (80 83) */ ld [%i1+60],%g3
+/* 0x0510 515 (81 83) */ ldx [%sp+2223],%g2
+/* 0x0514 516 (82 83) */ st %g4,[%i0+56]
+/* 0x0518 517 (83 84) */ add %g2,%g3,%g2
+/* 0x051c 518 (84 85) */ add %g2,%g5,%g2
+/* 0x0520 519 (84 85) */ st %g2,[%i0+60]
+/* 0x0524 523 (85 86) */ srax %g2,32,%o3
+/* 0x0528 524 (86 87) */ srl %o3,0,%i0
+/* 0x052c (87 89) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0530 (89 90) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L900000157
+!
+
+ .L900000157: /* frequency 1.0 confidence 0.0 */
+/* 0x0534 532 ( 0 1) */ fmovd %f0,%f14
+/* 0x0538 533 ( 0 3) */ ldd [%o0],%f8
+/* 0x053c 539 ( 0 1) */ add %i3,1,%g2
+/* 0x0540 540 ( 1 4) */ ld [%sp+2223],%f7
+/* 0x0544 541 ( 1 2) */ srl %g2,31,%g3
+/* 0x0548 545 ( 1 2) */ add %fp,-217,%g4
+/* 0x054c 546 ( 2 3) */ add %g2,%g3,%g2
+/* 0x0550 547 ( 2 3) */ or %g0,0,%g5
+/* 0x0554 548 ( 2 5) */ ldd [%o0+8],%f18
+/* 0x0558 549 ( 3 4) */ fmovs %f8,%f6
+/* 0x055c 550 ( 3 4) */ sra %g2,1,%o1
+/* 0x0560 551 ( 3 4) */ or %g0,0,%o0
+/* 0x0564 552 ( 4 5) */ subcc %o1,0,%g0
+/* 0x0568 553 ( 5 6) */ or %g0,%o1,%o3
+/* 0x056c 554 ( 5 8) */ fsubd %f6,%f8,%f16
+/* 0x0570 555 ( 5 6) */ ble,pt %icc,.L900000156 ! tprob=0.50
+/* 0x0574 ( 6 7) */ subcc %i3,0,%g0
+/* 0x0578 557 ( 6 7) */ sub %o1,1,%g2
+/* 0x057c 558 ( 7 8) */ or %g0,0,%i0
+/* 0x0580 559 ( 7 8) */ or %g0,1,%g3
+/* 0x0584 560 ( 8 9) */ subcc %o3,10,%g0
+/* 0x0588 561 ( 8 9) */ bl,pn %icc,.L77000077 ! tprob=0.50
+/* 0x058c ( 9 10) */ or %g0,0,%o1
+/* 0x0590 563 ( 9 12) */ ldd [%i2+8],%f0
+/* 0x0594 564 ( 9 10) */ sub %o3,3,%o3
+/* 0x0598 565 (10 13) */ ldd [%i2],%f2
+/* 0x059c 566 (10 11) */ or %g0,7,%o0
+/* 0x05a0 567 (10 11) */ or %g0,2,%i0
+/* 0x05a4 568 (11 13) */ fxnor %f14,%f0,%f8
+/* 0x05a8 569 (11 14) */ ldd [%i2+16],%f4
+/* 0x05ac 570 (11 12) */ or %g0,16,%o2
+/* 0x05b0 571 (12 14) */ fxnor %f14,%f2,%f2
+/* 0x05b4 572 (12 15) */ ldd [%i2+24],%f6
+/* 0x05b8 573 (12 13) */ or %g0,48,%o4
+/* 0x05bc 574 (13 16) */ fitod %f8,%f12
+/* 0x05c0 575 (13 14) */ or %g0,24,%o1
+/* 0x05c4 576 (13 14) */ or %g0,3,%g3
+/* 0x05c8 577 (14 17) */ fitod %f2,%f0
+/* 0x05cc 578 (15 18) */ fitod %f3,%f20
+/* 0x05d0 579 (15 18) */ ldd [%i2+32],%f2
+/* 0x05d4 580 (16 19) */ fitod %f9,%f10
+/* 0x05d8 581 (16 19) */ ldd [%i2+40],%f8
+/* 0x05dc 582 (17 20) */ fsubd %f18,%f0,%f0
+/* 0x05e0 583 (18 21) */ fsubd %f18,%f20,%f22
+/* 0x05e4 584 (19 22) */ fsubd %f18,%f12,%f20
+/* 0x05e8 585 (19 22) */ ldd [%i2+48],%f12
+/* 0x05ec 586 (20 23) */ fsubd %f18,%f10,%f10
+/* 0x05f0 587 (20 23) */ fmuld %f0,%f16,%f0
+/* 0x05f4 588 (21 23) */ fxnor %f14,%f4,%f4
+/* 0x05f8 589 (21 24) */ fmuld %f22,%f16,%f22
+/* 0x05fc 590 (22 24) */ fxnor %f14,%f6,%f6
+/* 0x0600 591 (22 25) */ fmuld %f20,%f16,%f20
+/* 0x0604 592 (23 26) */ fdtox %f0,%f0
+/* 0x0608 593 (23 24) */ std %f0,[%fp-217]
+/* 0x060c 594 (23 26) */ fmuld %f10,%f16,%f10
+/* 0x0610 595 (24 27) */ fdtox %f22,%f22
+/* 0x0614 596 (24 25) */ std %f22,[%fp-209]
+/* 0x0618 597 (25 28) */ fitod %f5,%f0
+/* 0x061c 598 (26 29) */ fdtox %f10,%f10
+/* 0x0620 599 (27 30) */ fdtox %f20,%f20
+/* 0x0624 600 (27 28) */ std %f20,[%fp-201]
+/* 0x0628 601 (28 31) */ fitod %f4,%f4
+/* 0x062c 602 (28 29) */ std %f10,[%fp-193]
+/* 0x0630 603 (29 31) */ fxnor %f14,%f2,%f10
+/* 0x0634 604 (30 33) */ fitod %f7,%f2
+/* 0x0638 605 (31 34) */ fsubd %f18,%f0,%f0
+/* 0x063c 606 (32 35) */ fsubd %f18,%f4,%f4
+/* 0x0640 607 (33 35) */ fxnor %f14,%f8,%f8
+
+!
+! ENTRY .L900000144
+!
+
+ .L900000144: /* frequency 1.0 confidence 0.0 */
+/* 0x0644 609 ( 0 3) */ fitod %f11,%f22
+/* 0x0648 610 ( 0 1) */ add %o0,3,%o0
+/* 0x064c 611 ( 0 1) */ add %g3,6,%g3
+/* 0x0650 612 ( 0 3) */ fmuld %f0,%f16,%f0
+/* 0x0654 613 ( 1 4) */ fmuld %f4,%f16,%f24
+/* 0x0658 614 ( 1 2) */ subcc %o0,%o3,%g0
+/* 0x065c 615 ( 1 2) */ add %i0,6,%i0
+/* 0x0660 616 ( 1 4) */ fsubd %f18,%f2,%f2
+/* 0x0664 617 ( 2 5) */ fitod %f6,%f4
+/* 0x0668 618 ( 3 6) */ fdtox %f0,%f0
+/* 0x066c 619 ( 3 4) */ add %o4,8,%i1
+/* 0x0670 620 ( 4 7) */ ldd [%i2+%i1],%f20
+/* 0x0674 621 ( 4 7) */ fdtox %f24,%f6
+/* 0x0678 622 ( 4 5) */ add %o2,16,%o4
+/* 0x067c 623 ( 5 8) */ fsubd %f18,%f4,%f4
+/* 0x0680 624 ( 5 6) */ std %f6,[%o4+%g4]
+/* 0x0684 625 ( 5 6) */ add %o1,16,%o2
+/* 0x0688 626 ( 6 8) */ fxnor %f14,%f12,%f6
+/* 0x068c 627 ( 6 7) */ std %f0,[%o2+%g4]
+/* 0x0690 628 ( 7 10) */ fitod %f9,%f0
+/* 0x0694 629 ( 7 10) */ fmuld %f2,%f16,%f2
+/* 0x0698 630 ( 8 11) */ fmuld %f4,%f16,%f24
+/* 0x069c 631 ( 8 11) */ fsubd %f18,%f22,%f12
+/* 0x06a0 632 ( 9 12) */ fitod %f10,%f4
+/* 0x06a4 633 (10 13) */ fdtox %f2,%f2
+/* 0x06a8 634 (10 11) */ add %i1,8,%o1
+/* 0x06ac 635 (11 14) */ ldd [%i2+%o1],%f22
+/* 0x06b0 636 (11 14) */ fdtox %f24,%f10
+/* 0x06b4 637 (11 12) */ add %o4,16,%i4
+/* 0x06b8 638 (12 15) */ fsubd %f18,%f4,%f4
+/* 0x06bc 639 (12 13) */ std %f10,[%i4+%g4]
+/* 0x06c0 640 (12 13) */ add %o2,16,%i1
+/* 0x06c4 641 (13 15) */ fxnor %f14,%f20,%f10
+/* 0x06c8 642 (13 14) */ std %f2,[%i1+%g4]
+/* 0x06cc 643 (14 17) */ fitod %f7,%f2
+/* 0x06d0 644 (14 17) */ fmuld %f12,%f16,%f12
+/* 0x06d4 645 (15 18) */ fmuld %f4,%f16,%f24
+/* 0x06d8 646 (15 18) */ fsubd %f18,%f0,%f0
+/* 0x06dc 647 (16 19) */ fitod %f8,%f4
+/* 0x06e0 648 (17 20) */ fdtox %f12,%f20
+/* 0x06e4 649 (17 18) */ add %o1,8,%o4
+/* 0x06e8 650 (18 21) */ ldd [%i2+%o4],%f12
+/* 0x06ec 651 (18 21) */ fdtox %f24,%f8
+/* 0x06f0 652 (18 19) */ add %i4,16,%o2
+/* 0x06f4 653 (19 22) */ fsubd %f18,%f4,%f4
+/* 0x06f8 654 (19 20) */ std %f8,[%o2+%g4]
+/* 0x06fc 655 (19 20) */ add %i1,16,%o1
+/* 0x0700 656 (20 22) */ fxnor %f14,%f22,%f8
+/* 0x0704 657 (20 21) */ ble,pt %icc,.L900000144 ! tprob=0.50
+/* 0x0708 (20 21) */ std %f20,[%o1+%g4]
+
+!
+! ENTRY .L900000147
+!
+
+ .L900000147: /* frequency 1.0 confidence 0.0 */
+/* 0x070c 660 ( 0 3) */ fitod %f6,%f6
+/* 0x0710 661 ( 0 3) */ fmuld %f4,%f16,%f24
+/* 0x0714 662 ( 0 1) */ add %i4,32,%l4
+/* 0x0718 663 ( 1 4) */ fsubd %f18,%f2,%f2
+/* 0x071c 664 ( 1 4) */ fmuld %f0,%f16,%f22
+/* 0x0720 665 ( 1 2) */ add %i1,32,%l3
+/* 0x0724 666 ( 2 5) */ fitod %f10,%f28
+/* 0x0728 667 ( 2 3) */ sra %o0,0,%o2
+/* 0x072c 668 ( 2 3) */ add %i4,48,%l2
+/* 0x0730 669 ( 3 6) */ fsubd %f18,%f6,%f4
+/* 0x0734 670 ( 3 4) */ add %i1,48,%l1
+/* 0x0738 671 ( 3 4) */ add %i4,64,%l0
+/* 0x073c 672 ( 4 7) */ fitod %f11,%f26
+/* 0x0740 673 ( 4 5) */ sllx %o2,3,%o1
+/* 0x0744 674 ( 4 5) */ add %i1,64,%i5
+/* 0x0748 675 ( 5 8) */ fitod %f8,%f6
+/* 0x074c 676 ( 5 6) */ add %i4,80,%i4
+/* 0x0750 677 ( 5 6) */ add %i1,80,%i1
+/* 0x0754 678 ( 6 8) */ fxnor %f14,%f12,%f0
+/* 0x0758 679 ( 6 9) */ fmuld %f4,%f16,%f20
+/* 0x075c 680 ( 6 7) */ add %i4,16,%o4
+/* 0x0760 681 ( 7 10) */ fitod %f9,%f4
+/* 0x0764 682 ( 7 10) */ fmuld %f2,%f16,%f12
+/* 0x0768 683 ( 7 8) */ add %i1,16,%o3
+/* 0x076c 684 ( 8 11) */ fsubd %f18,%f28,%f10
+/* 0x0770 685 ( 8 9) */ subcc %o0,%g2,%g0
+/* 0x0774 686 ( 8 9) */ add %g3,12,%g3
+/* 0x0778 687 ( 9 12) */ fitod %f0,%f2
+/* 0x077c 688 (10 13) */ fsubd %f18,%f26,%f8
+/* 0x0780 689 (11 14) */ fitod %f1,%f0
+/* 0x0784 690 (11 14) */ fmuld %f10,%f16,%f10
+/* 0x0788 691 (12 15) */ fdtox %f24,%f24
+/* 0x078c 692 (12 13) */ std %f24,[%l4+%g4]
+/* 0x0790 693 (12 13) */ add %i0,12,%i0
+/* 0x0794 694 (13 16) */ fsubd %f18,%f6,%f6
+/* 0x0798 695 (13 16) */ fmuld %f8,%f16,%f8
+/* 0x079c 696 (14 17) */ fdtox %f22,%f22
+/* 0x07a0 697 (14 15) */ std %f22,[%l3+%g4]
+/* 0x07a4 698 (15 18) */ fsubd %f18,%f4,%f4
+/* 0x07a8 699 (16 19) */ fdtox %f20,%f20
+/* 0x07ac 700 (16 17) */ std %f20,[%l2+%g4]
+/* 0x07b0 701 (16 19) */ fmuld %f6,%f16,%f6
+/* 0x07b4 702 (17 20) */ fsubd %f18,%f2,%f2
+/* 0x07b8 703 (18 21) */ fsubd %f18,%f0,%f0
+/* 0x07bc 704 (18 21) */ fmuld %f4,%f16,%f4
+/* 0x07c0 705 (19 22) */ fdtox %f12,%f12
+/* 0x07c4 706 (19 20) */ std %f12,[%l1+%g4]
+/* 0x07c8 707 (20 23) */ fdtox %f10,%f10
+/* 0x07cc 708 (20 21) */ std %f10,[%l0+%g4]
+/* 0x07d0 709 (20 23) */ fmuld %f2,%f16,%f2
+/* 0x07d4 710 (21 24) */ fdtox %f8,%f8
+/* 0x07d8 711 (21 22) */ std %f8,[%i5+%g4]
+/* 0x07dc 712 (21 24) */ fmuld %f0,%f16,%f0
+/* 0x07e0 713 (22 25) */ fdtox %f6,%f6
+/* 0x07e4 714 (22 23) */ std %f6,[%i4+%g4]
+/* 0x07e8 715 (23 26) */ fdtox %f4,%f4
+/* 0x07ec 716 (23 24) */ std %f4,[%i1+%g4]
+/* 0x07f0 717 (24 27) */ fdtox %f2,%f2
+/* 0x07f4 718 (24 25) */ std %f2,[%o4+%g4]
+/* 0x07f8 719 (25 28) */ fdtox %f0,%f0
+/* 0x07fc 720 (25 26) */ bg,pn %icc,.L77000043 ! tprob=0.50
+/* 0x0800 (25 26) */ std %f0,[%o3+%g4]
+
+!
+! ENTRY .L77000077
+!
+
+ .L77000077: /* frequency 1.0 confidence 0.0 */
+/* 0x0804 723 ( 0 3) */ ldd [%i2+%o1],%f0
+
+!
+! ENTRY .L900000155
+!
+
+ .L900000155: /* frequency 1.0 confidence 0.0 */
+/* 0x0808 725 ( 0 2) */ fxnor %f14,%f0,%f0
+/* 0x080c 726 ( 0 1) */ sra %i0,0,%o1
+/* 0x0810 727 ( 0 1) */ add %o0,1,%o0
+/* 0x0814 728 ( 1 2) */ sllx %o1,3,%i4
+/* 0x0818 729 ( 1 2) */ add %i0,2,%i0
+/* 0x081c 730 ( 2 5) */ fitod %f0,%f2
+/* 0x0820 731 ( 2 3) */ sra %g3,0,%o1
+/* 0x0824 732 ( 2 3) */ add %g3,2,%g3
+/* 0x0828 733 ( 3 6) */ fitod %f1,%f0
+/* 0x082c 734 ( 3 4) */ sllx %o1,3,%i1
+/* 0x0830 735 ( 3 4) */ subcc %o0,%g2,%g0
+/* 0x0834 736 ( 4 5) */ sra %o0,0,%o2
+/* 0x0838 737 ( 5 8) */ fsubd %f18,%f2,%f2
+/* 0x083c 738 ( 5 6) */ sllx %o2,3,%o1
+/* 0x0840 739 ( 6 9) */ fsubd %f18,%f0,%f0
+/* 0x0844 740 ( 8 11) */ fmuld %f2,%f16,%f2
+/* 0x0848 741 ( 9 12) */ fmuld %f0,%f16,%f0
+/* 0x084c 742 (11 14) */ fdtox %f2,%f2
+/* 0x0850 743 (11 12) */ std %f2,[%i4+%g4]
+/* 0x0854 744 (12 15) */ fdtox %f0,%f0
+/* 0x0858 745 (12 13) */ std %f0,[%i1+%g4]
+/* 0x085c 746 (12 13) */ ble,a,pt %icc,.L900000155 ! tprob=0.50
+/* 0x0860 (14 17) */ ldd [%i2+%o1],%f0
+
+!
+! ENTRY .L77000043
+!
+
+ .L77000043: /* frequency 1.0 confidence 0.0 */
+/* 0x0864 754 ( 0 1) */ subcc %i3,0,%g0
+
+!
+! ENTRY .L900000156
+!
+
+ .L900000156: /* frequency 1.0 confidence 0.0 */
+/* 0x0868 756 ( 0 1) */ ble,a,pt %icc,.L77000061 ! tprob=0.50
+/* 0x086c ( 0 1) */ or %g0,%g5,%o3
+/* 0x0870 761 ( 0 2) */ ldx [%fp-209],%i1
+/* 0x0874 762 ( 1 2) */ sub %i3,1,%g3
+/* 0x0878 763 ( 1 2) */ or %g0,0,%i0
+/* 0x087c 764 ( 2 3) */ subcc %i3,5,%g0
+/* 0x0880 765 ( 2 3) */ bl,pn %icc,.L77000078 ! tprob=0.50
+/* 0x0884 ( 2 4) */ ldx [%fp-217],%i2
+/* 0x0888 767 ( 3 6) */ ld [%o5],%i3
+/* 0x088c 768 ( 3 4) */ or %g0,8,%g2
+/* 0x0890 769 ( 3 4) */ or %g0,16,%o4
+/* 0x0894 770 ( 4 5) */ sub %g3,1,%o3
+/* 0x0898 771 ( 4 5) */ or %g0,3,%i0
+/* 0x089c 772 ( 5 6) */ add %i2,%i3,%o1
+/* 0x08a0 773 ( 5 8) */ ld [%o5+4],%i2
+/* 0x08a4 774 ( 6 7) */ st %o1,[%o7]
+/* 0x08a8 775 ( 6 7) */ srax %o1,32,%o1
+/* 0x08ac 776 ( 7 9) */ ldx [%fp-201],%o2
+/* 0x08b0 777 ( 7 8) */ add %i1,%i2,%o0
+/* 0x08b4 778 ( 7 8) */ or %g0,%o1,%i1
+/* 0x08b8 779 ( 8 11) */ ld [%o5+8],%o1
+/* 0x08bc 780 ( 8 9) */ add %o0,%i1,%o0
+/* 0x08c0 781 ( 9 10) */ st %o0,[%o7+4]
+/* 0x08c4 782 ( 9 10) */ srax %o0,32,%o0
+
+!
+! ENTRY .L900000140
+!
+
+ .L900000140: /* frequency 1.0 confidence 0.0 */
+/* 0x08c8 784 ( 0 1) */ add %g2,4,%i1
+/* 0x08cc 785 ( 0 1) */ add %o4,8,%o4
+/* 0x08d0 786 ( 1 3) */ ldx [%o4+%g4],%i2
+/* 0x08d4 787 ( 1 2) */ sra %o0,0,%g5
+/* 0x08d8 788 ( 1 2) */ add %o2,%o1,%o1
+/* 0x08dc 789 ( 2 5) */ ld [%o5+%i1],%o0
+/* 0x08e0 790 ( 2 3) */ add %o1,%g5,%o1
+/* 0x08e4 791 ( 2 3) */ add %i0,2,%i0
+/* 0x08e8 792 ( 3 4) */ st %o1,[%o7+%g2]
+/* 0x08ec 793 ( 3 4) */ srax %o1,32,%g5
+/* 0x08f0 794 ( 3 4) */ subcc %i0,%o3,%g0
+/* 0x08f4 795 ( 4 5) */ add %g2,8,%g2
+/* 0x08f8 796 ( 4 5) */ add %o4,8,%o4
+/* 0x08fc 797 ( 5 7) */ ldx [%o4+%g4],%o2
+/* 0x0900 798 ( 5 6) */ add %i2,%o0,%o0
+/* 0x0904 799 ( 6 9) */ ld [%o5+%g2],%o1
+/* 0x0908 800 ( 6 7) */ add %o0,%g5,%o0
+/* 0x090c 801 ( 7 8) */ st %o0,[%o7+%i1]
+/* 0x0910 802 ( 7 8) */ ble,pt %icc,.L900000140 ! tprob=0.50
+/* 0x0914 ( 7 8) */ srax %o0,32,%o0
+
+!
+! ENTRY .L900000143
+!
+
+ .L900000143: /* frequency 1.0 confidence 0.0 */
+/* 0x0918 805 ( 0 1) */ sra %o0,0,%o3
+/* 0x091c 806 ( 0 1) */ add %o2,%o1,%o0
+/* 0x0920 807 ( 1 2) */ add %o0,%o3,%o0
+/* 0x0924 808 ( 1 2) */ st %o0,[%o7+%g2]
+/* 0x0928 809 ( 1 2) */ subcc %i0,%g3,%g0
+/* 0x092c 810 ( 2 3) */ srax %o0,32,%g5
+/* 0x0930 811 ( 2 3) */ bg,a,pn %icc,.L77000061 ! tprob=0.50
+/* 0x0934 ( 3 4) */ or %g0,%g5,%o3
+
+!
+! ENTRY .L77000078
+!
+
+ .L77000078: /* frequency 1.0 confidence 0.0 */
+/* 0x0938 814 ( 0 1) */ sra %i0,0,%o0
+
+!
+! ENTRY .L900000154
+!
+
+ .L900000154: /* frequency 1.0 confidence 0.0 */
+/* 0x093c 816 ( 0 1) */ sllx %o0,2,%g2
+/* 0x0940 817 ( 0 1) */ add %i0,1,%i0
+/* 0x0944 818 ( 1 2) */ sllx %o0,3,%o4
+/* 0x0948 819 ( 1 4) */ ld [%o5+%g2],%o2
+/* 0x094c 820 ( 1 2) */ subcc %i0,%g3,%g0
+/* 0x0950 821 ( 2 4) */ ldx [%o4+%g4],%o0
+/* 0x0954 822 ( 2 3) */ sra %g5,0,%o1
+/* 0x0958 823 ( 4 5) */ add %o0,%o2,%o0
+/* 0x095c 824 ( 5 6) */ add %o0,%o1,%o0
+/* 0x0960 825 ( 5 6) */ st %o0,[%o7+%g2]
+/* 0x0964 826 ( 6 7) */ srax %o0,32,%g5
+/* 0x0968 827 ( 6 7) */ ble,pt %icc,.L900000154 ! tprob=0.50
+/* 0x096c ( 7 8) */ sra %i0,0,%o0
+
+!
+! ENTRY .L77000047
+!
+
+ .L77000047: /* frequency 1.0 confidence 0.0 */
+/* 0x0970 834 ( 0 1) */ or %g0,%g5,%o3
+
+!
+! ENTRY .L77000061
+!
+
+ .L77000061: /* frequency 1.0 confidence 0.0 */
+
+/* 0x0974 835 ( 1 2) */ srl %o3,0,%i0
+/* 0x0978 ( 2 4) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x097c ( 4 5) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000048
+!
+
+ .L77000048: /* frequency 1.0 confidence 0.0 */
+/* 0x0980 844 ( 0 1) */ bne,pn %icc,.L77000050 ! tprob=0.50
+/* 0x0984 ( 0 1) */ sethi %hi(0xfff80000),%g2
+/* 0x0988 854 ( 0 3) */ ldd [%o2],%f4
+/* 0x098c 855 ( 1 4) */ ldd [%o0],%f6
+/* 0x0990 856 ( 1 2) */ srl %i4,19,%g3
+/* 0x0994 857 ( 1 2) */ andn %i4,%g2,%g2
+/* 0x0998 858 ( 2 3) */ st %g3,[%sp+2351]
+/* 0x099c 859 ( 2 4) */ fxnor %f0,%f4,%f4
+/* 0x09a0 860 ( 3 4) */ st %g2,[%sp+2355]
+/* 0x09a4 861 ( 4 7) */ ldd [%o2+8],%f12
+/* 0x09a8 862 ( 4 7) */ fitod %f4,%f10
+/* 0x09ac 863 ( 5 8) */ ldd [%o0+8],%f16
+/* 0x09b0 864 ( 5 8) */ fitod %f5,%f4
+/* 0x09b4 865 ( 6 9) */ ldd [%o2+16],%f18
+/* 0x09b8 866 ( 6 8) */ fxnor %f0,%f12,%f12
+/* 0x09bc 867 ( 7 10) */ ld [%sp+2351],%f9
+/* 0x09c0 868 ( 7 10) */ fsubd %f16,%f10,%f10
+/* 0x09c4 869 ( 8 11) */ ld [%sp+2355],%f15
+/* 0x09c8 870 ( 8 11) */ fitod %f12,%f22
+/* 0x09cc 871 ( 9 12) */ ldd [%o2+24],%f20
+/* 0x09d0 872 ( 9 12) */ fitod %f13,%f12
+/* 0x09d4 876 (10 13) */ ld [%i1],%g2
+/* 0x09d8 877 (10 13) */ fsubd %f16,%f4,%f4
+/* 0x09dc 878 (11 14) */ ld [%i1+4],%g3
+/* 0x09e0 879 (11 14) */ fsubd %f16,%f22,%f22
+/* 0x09e4 880 (12 15) */ ld [%i1+8],%g4
+/* 0x09e8 881 (12 14) */ fxnor %f0,%f18,%f18
+/* 0x09ec 882 (13 16) */ ld [%i1+12],%g5
+/* 0x09f0 883 (13 16) */ fsubd %f16,%f12,%f12
+/* 0x09f4 884 (14 17) */ ld [%i1+16],%o0
+/* 0x09f8 885 (14 17) */ fitod %f18,%f26
+/* 0x09fc 886 (15 18) */ ld [%i1+20],%o1
+/* 0x0a00 887 (15 17) */ fxnor %f0,%f20,%f20
+/* 0x0a04 888 (16 19) */ ld [%i1+24],%o2
+/* 0x0a08 889 (17 20) */ ld [%i1+28],%o3
+/* 0x0a0c 890 (19 20) */ fmovs %f6,%f8
+/* 0x0a10 891 (20 21) */ fmovs %f6,%f14
+/* 0x0a14 892 (22 25) */ fsubd %f8,%f6,%f8
+/* 0x0a18 893 (23 26) */ fsubd %f14,%f6,%f6
+/* 0x0a1c 894 (25 28) */ fmuld %f10,%f8,%f14
+/* 0x0a20 895 (26 29) */ fmuld %f10,%f6,%f10
+/* 0x0a24 896 (27 30) */ fmuld %f4,%f8,%f24
+/* 0x0a28 897 (28 31) */ fdtox %f14,%f14
+/* 0x0a2c 898 (28 29) */ std %f14,[%sp+2335]
+/* 0x0a30 899 (28 31) */ fmuld %f22,%f8,%f28
+/* 0x0a34 900 (29 32) */ fitod %f19,%f14
+/* 0x0a38 901 (29 32) */ fmuld %f22,%f6,%f18
+/* 0x0a3c 902 (30 33) */ fdtox %f10,%f10
+/* 0x0a40 903 (30 31) */ std %f10,[%sp+2343]
+/* 0x0a44 904 (30 33) */ fmuld %f4,%f6,%f4
+/* 0x0a48 905 (31 34) */ fmuld %f12,%f8,%f22
+/* 0x0a4c 906 (32 35) */ fdtox %f18,%f18
+/* 0x0a50 907 (32 33) */ std %f18,[%sp+2311]
+/* 0x0a54 908 (32 35) */ fmuld %f12,%f6,%f10
+/* 0x0a58 909 (33 35) */ ldx [%sp+2335],%o4
+/* 0x0a5c 910 (33 36) */ fdtox %f24,%f12
+/* 0x0a60 911 (34 35) */ std %f12,[%sp+2319]
+/* 0x0a64 912 (34 37) */ fsubd %f16,%f26,%f12
+/* 0x0a68 913 (35 37) */ ldx [%sp+2343],%o5
+/* 0x0a6c 914 (35 36) */ sllx %o4,19,%o4
+/* 0x0a70 915 (35 38) */ fdtox %f4,%f4
+/* 0x0a74 916 (36 37) */ std %f4,[%sp+2327]
+/* 0x0a78 917 (36 39) */ fdtox %f28,%f24
+/* 0x0a7c 918 (37 38) */ std %f24,[%sp+2303]
+/* 0x0a80 919 (37 40) */ fitod %f20,%f4
+/* 0x0a84 920 (37 38) */ add %o5,%o4,%o4
+/* 0x0a88 921 (37 40) */ fmuld %f12,%f8,%f24
+/* 0x0a8c 922 (38 40) */ ldx [%sp+2319],%o7
+/* 0x0a90 923 (38 41) */ fsubd %f16,%f14,%f14
+/* 0x0a94 924 (38 39) */ add %o4,%g2,%o4
+/* 0x0a98 925 (38 41) */ fmuld %f12,%f6,%f12
+/* 0x0a9c 926 (39 41) */ ldx [%sp+2327],%o5
+/* 0x0aa0 927 (39 42) */ fitod %f21,%f18
+/* 0x0aa4 928 (40 41) */ st %o4,[%i0]
+/* 0x0aa8 929 (40 41) */ sllx %o7,19,%o7
+/* 0x0aac 930 (40 43) */ fdtox %f22,%f20
+/* 0x0ab0 931 (41 42) */ std %f20,[%sp+2287]
+/* 0x0ab4 932 (41 44) */ fdtox %f10,%f10
+/* 0x0ab8 933 (41 42) */ add %o5,%o7,%o5
+/* 0x0abc 934 (41 44) */ fmuld %f14,%f8,%f20
+/* 0x0ac0 935 (42 43) */ std %f10,[%sp+2295]
+/* 0x0ac4 936 (42 43) */ srlx %o4,32,%o7
+/* 0x0ac8 937 (42 45) */ fsubd %f16,%f4,%f4
+/* 0x0acc 938 (42 45) */ fmuld %f14,%f6,%f14
+/* 0x0ad0 939 (43 45) */ ldx [%sp+2311],%g2
+/* 0x0ad4 940 (43 46) */ fdtox %f24,%f10
+/* 0x0ad8 941 (43 44) */ add %o5,%g3,%g3
+/* 0x0adc 942 (44 45) */ std %f10,[%sp+2271]
+/* 0x0ae0 943 (44 45) */ add %g3,%o7,%g3
+/* 0x0ae4 944 (44 47) */ fdtox %f12,%f12
+/* 0x0ae8 945 (45 47) */ ldx [%sp+2303],%l0
+/* 0x0aec 946 (45 48) */ fsubd %f16,%f18,%f10
+/* 0x0af0 947 (45 48) */ fmuld %f4,%f8,%f16
+/* 0x0af4 948 (46 47) */ std %f12,[%sp+2279]
+/* 0x0af8 949 (46 49) */ fdtox %f20,%f12
+/* 0x0afc 950 (46 49) */ fmuld %f4,%f6,%f4
+/* 0x0b00 951 (47 48) */ std %f12,[%sp+2255]
+/* 0x0b04 952 (47 48) */ sllx %l0,19,%l0
+/* 0x0b08 953 (47 50) */ fdtox %f14,%f12
+/* 0x0b0c 954 (48 50) */ ldx [%sp+2287],%o5
+/* 0x0b10 955 (48 49) */ add %g2,%l0,%g2
+/* 0x0b14 956 (48 51) */ fmuld %f10,%f8,%f8
+/* 0x0b18 957 (49 51) */ ldx [%sp+2295],%l1
+/* 0x0b1c 958 (49 50) */ srlx %g3,32,%l0
+/* 0x0b20 959 (49 50) */ add %g2,%g4,%g4
+/* 0x0b24 960 (49 52) */ fmuld %f10,%f6,%f6
+/* 0x0b28 961 (50 51) */ std %f12,[%sp+2263]
+/* 0x0b2c 962 (50 51) */ sllx %o5,19,%g2
+/* 0x0b30 963 (50 51) */ add %g4,%l0,%g4
+/* 0x0b34 964 (51 53) */ ldx [%sp+2279],%l0
+/* 0x0b38 965 (51 52) */ srlx %g4,32,%o5
+/* 0x0b3c 966 (51 52) */ add %l1,%g2,%g2
+/* 0x0b40 967 (52 53) */ st %g3,[%i0+4]
+/* 0x0b44 968 (52 53) */ add %g2,%g5,%g2
+/* 0x0b48 969 (52 55) */ fdtox %f16,%f10
+/* 0x0b4c 970 (53 55) */ ldx [%sp+2271],%o7
+/* 0x0b50 971 (53 54) */ add %g2,%o5,%g2
+/* 0x0b54 972 (53 56) */ fdtox %f4,%f4
+/* 0x0b58 973 (54 55) */ std %f10,[%sp+2239]
+/* 0x0b5c 974 (55 56) */ sllx %o7,19,%o7
+/* 0x0b60 975 (55 56) */ std %f4,[%sp+2247]
+/* 0x0b64 976 (55 58) */ fdtox %f8,%f4
+/* 0x0b68 977 (56 57) */ add %l0,%o7,%o7
+/* 0x0b6c 978 (56 58) */ ldx [%sp+2263],%o5
+/* 0x0b70 979 (57 58) */ add %o7,%o0,%o0
+/* 0x0b74 980 (57 58) */ std %f4,[%sp+2223]
+/* 0x0b78 981 (57 60) */ fdtox %f6,%f4
+/* 0x0b7c 982 (58 60) */ ldx [%sp+2255],%g5
+/* 0x0b80 983 (58 59) */ srlx %g2,32,%o7
+/* 0x0b84 984 (59 60) */ std %f4,[%sp+2231]
+/* 0x0b88 985 (59 60) */ add %o0,%o7,%o0
+/* 0x0b8c 986 (60 61) */ sllx %g5,19,%g5
+/* 0x0b90 987 (60 62) */ ldx [%sp+2247],%l1
+/* 0x0b94 988 (61 62) */ add %o5,%g5,%g5
+/* 0x0b98 989 (61 62) */ st %g2,[%i0+12]
+/* 0x0b9c 990 (62 64) */ ldx [%sp+2239],%l0
+/* 0x0ba0 991 (62 63) */ srlx %o0,32,%o4
+/* 0x0ba4 992 (62 63) */ add %g5,%o1,%o1
+/* 0x0ba8 993 (63 64) */ add %o1,%o4,%o1
+/* 0x0bac 994 (63 65) */ ldx [%sp+2223],%o7
+/* 0x0bb0 995 (64 65) */ sllx %l0,19,%g3
+/* 0x0bb4 996 (64 66) */ ldx [%sp+2231],%o5
+/* 0x0bb8 997 (65 66) */ add %l1,%g3,%o4
+/* 0x0bbc 998 (65 66) */ st %o0,[%i0+16]
+/* 0x0bc0 999 (66 67) */ add %o4,%o2,%o2
+/* 0x0bc4 1000 (66 67) */ st %o1,[%i0+20]
+/* 0x0bc8 1001 (67 68) */ srlx %o1,32,%o4
+/* 0x0bcc 1002 (67 68) */ st %g4,[%i0+8]
+/* 0x0bd0 1003 (68 69) */ sllx %o7,19,%g2
+/* 0x0bd4 1004 (68 69) */ add %o2,%o4,%o4
+/* 0x0bd8 1005 (68 69) */ st %o4,[%i0+24]
+/* 0x0bdc 1006 (69 70) */ add %o5,%g2,%g2
+/* 0x0be0 1007 (70 71) */ srlx %o4,32,%g3
+/* 0x0be4 1008 (70 71) */ add %g2,%o3,%g2
+/* 0x0be8 1009 (71 72) */ add %g2,%g3,%g2
+/* 0x0bec 1010 (71 72) */ st %g2,[%i0+28]
+/* 0x0bf0 1014 (72 73) */ srlx %g2,32,%o3
+/* 0x0bf4 1015 (73 74) */ srl %o3,0,%i0
+/* 0x0bf8 (74 76) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0bfc (76 77) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000050
+!
+
+ .L77000050: /* frequency 1.0 confidence 0.0 */
+/* 0x0c00 1022 ( 0 1) */ subcc %i3,16,%g0
+/* 0x0c04 1023 ( 0 1) */ bne,pn %icc,.L77000073 ! tprob=0.50
+/* 0x0c08 ( 0 1) */ sethi %hi(0xfff80000),%g2
+/* 0x0c0c 1034 ( 1 4) */ ldd [%o2],%f4
+/* 0x0c10 1035 ( 1 2) */ andn %i4,%g2,%g2
+/* 0x0c14 1036 ( 2 3) */ st %g2,[%sp+2483]
+/* 0x0c18 1037 ( 2 3) */ srl %i4,19,%g2
+/* 0x0c1c 1038 ( 3 4) */ st %g2,[%sp+2479]
+/* 0x0c20 1039 ( 3 5) */ fxnor %f0,%f4,%f4
+/* 0x0c24 1040 ( 4 7) */ ldd [%o0],%f8
+/* 0x0c28 1041 ( 5 8) */ fitod %f4,%f10
+/* 0x0c2c 1042 ( 5 8) */ ldd [%o0+8],%f16
+/* 0x0c30 1043 ( 6 9) */ ldd [%o2+8],%f14
+/* 0x0c34 1044 ( 6 9) */ fitod %f5,%f4
+/* 0x0c38 1045 ( 7 10) */ ld [%sp+2483],%f13
+/* 0x0c3c 1046 ( 8 11) */ ld [%sp+2479],%f7
+/* 0x0c40 1047 ( 8 11) */ fsubd %f16,%f10,%f10
+/* 0x0c44 1048 ( 9 11) */ fxnor %f0,%f14,%f14
+/* 0x0c48 1049 (10 13) */ fsubd %f16,%f4,%f4
+/* 0x0c4c 1050 (14 15) */ fmovs %f8,%f12
+/* 0x0c50 1051 (15 16) */ fmovs %f8,%f6
+/* 0x0c54 1052 (17 20) */ fsubd %f12,%f8,%f12
+/* 0x0c58 1053 (18 21) */ fsubd %f6,%f8,%f6
+/* 0x0c5c 1054 (19 22) */ fitod %f14,%f8
+/* 0x0c60 1055 (20 23) */ fmuld %f10,%f12,%f18
+/* 0x0c64 1056 (20 23) */ fitod %f15,%f14
+/* 0x0c68 1057 (21 24) */ fmuld %f10,%f6,%f10
+/* 0x0c6c 1058 (22 25) */ fsubd %f16,%f8,%f8
+/* 0x0c70 1059 (22 25) */ fmuld %f4,%f12,%f20
+/* 0x0c74 1060 (23 26) */ fmuld %f4,%f6,%f4
+/* 0x0c78 1061 (23 26) */ fsubd %f16,%f14,%f14
+/* 0x0c7c 1062 (24 27) */ fdtox %f10,%f10
+/* 0x0c80 1063 (24 25) */ std %f10,[%sp+2463]
+/* 0x0c84 1064 (25 28) */ fmuld %f8,%f12,%f10
+/* 0x0c88 1065 (25 28) */ fdtox %f18,%f18
+/* 0x0c8c 1066 (25 26) */ std %f18,[%sp+2471]
+/* 0x0c90 1067 (26 29) */ fmuld %f8,%f6,%f8
+/* 0x0c94 1068 (26 29) */ fdtox %f4,%f4
+/* 0x0c98 1069 (26 27) */ std %f4,[%sp+2447]
+/* 0x0c9c 1070 (27 30) */ fmuld %f14,%f12,%f4
+/* 0x0ca0 1071 (27 30) */ fdtox %f20,%f18
+/* 0x0ca4 1072 (27 28) */ std %f18,[%sp+2455]
+/* 0x0ca8 1073 (28 31) */ fdtox %f10,%f10
+/* 0x0cac 1074 (28 29) */ std %f10,[%sp+2439]
+/* 0x0cb0 1075 (28 31) */ fmuld %f14,%f6,%f14
+/* 0x0cb4 1076 (29 32) */ fdtox %f8,%f8
+/* 0x0cb8 1077 (29 30) */ std %f8,[%sp+2431]
+/* 0x0cbc 1078 (30 33) */ ldd [%o2+16],%f10
+/* 0x0cc0 1079 (30 33) */ fdtox %f4,%f4
+/* 0x0cc4 1080 (31 34) */ ldd [%o2+24],%f8
+/* 0x0cc8 1081 (31 34) */ fdtox %f14,%f14
+/* 0x0ccc 1082 (32 33) */ std %f4,[%sp+2423]
+/* 0x0cd0 1083 (32 34) */ fxnor %f0,%f10,%f10
+/* 0x0cd4 1084 (33 35) */ fxnor %f0,%f8,%f4
+/* 0x0cd8 1085 (33 34) */ std %f14,[%sp+2415]
+/* 0x0cdc 1086 (34 37) */ fitod %f10,%f8
+/* 0x0ce0 1087 (35 38) */ fitod %f11,%f10
+/* 0x0ce4 1088 (36 39) */ fitod %f4,%f14
+/* 0x0ce8 1089 (37 40) */ fsubd %f16,%f8,%f8
+/* 0x0cec 1090 (38 41) */ fsubd %f16,%f10,%f10
+/* 0x0cf0 1091 (39 42) */ fsubd %f16,%f14,%f14
+/* 0x0cf4 1092 (40 43) */ fmuld %f8,%f12,%f18
+/* 0x0cf8 1093 (40 43) */ fitod %f5,%f4
+/* 0x0cfc 1094 (41 44) */ fmuld %f8,%f6,%f8
+/* 0x0d00 1095 (42 45) */ fmuld %f10,%f12,%f20
+/* 0x0d04 1096 (43 46) */ fmuld %f10,%f6,%f10
+/* 0x0d08 1097 (43 46) */ fsubd %f16,%f4,%f4
+/* 0x0d0c 1098 (44 47) */ fdtox %f8,%f8
+/* 0x0d10 1099 (44 45) */ std %f8,[%sp+2399]
+/* 0x0d14 1100 (45 48) */ fmuld %f14,%f12,%f8
+/* 0x0d18 1101 (45 48) */ fdtox %f18,%f18
+/* 0x0d1c 1102 (45 46) */ std %f18,[%sp+2407]
+/* 0x0d20 1103 (46 49) */ fdtox %f10,%f10
+/* 0x0d24 1104 (46 47) */ std %f10,[%sp+2383]
+/* 0x0d28 1105 (46 49) */ fmuld %f14,%f6,%f14
+/* 0x0d2c 1106 (47 50) */ fmuld %f4,%f12,%f10
+/* 0x0d30 1107 (47 50) */ fdtox %f20,%f18
+/* 0x0d34 1108 (47 48) */ std %f18,[%sp+2391]
+/* 0x0d38 1109 (48 51) */ fdtox %f8,%f8
+/* 0x0d3c 1110 (48 49) */ std %f8,[%sp+2375]
+/* 0x0d40 1111 (48 51) */ fmuld %f4,%f6,%f4
+/* 0x0d44 1112 (49 52) */ fdtox %f14,%f14
+/* 0x0d48 1113 (49 50) */ std %f14,[%sp+2367]
+/* 0x0d4c 1117 (50 53) */ ldd [%o2+32],%f8
+/* 0x0d50 1118 (50 53) */ fdtox %f10,%f10
+/* 0x0d54 1119 (51 54) */ fdtox %f4,%f4
+/* 0x0d58 1120 (51 52) */ std %f4,[%sp+2351]
+/* 0x0d5c 1121 (52 54) */ fxnor %f0,%f8,%f8
+/* 0x0d60 1122 (52 55) */ ldd [%o2+40],%f14
+/* 0x0d64 1123 (53 54) */ std %f10,[%sp+2359]
+/* 0x0d68 1124 (54 57) */ fitod %f8,%f4
+/* 0x0d6c 1125 (55 57) */ fxnor %f0,%f14,%f10
+/* 0x0d70 1126 (56 59) */ fitod %f9,%f8
+/* 0x0d74 1127 (57 60) */ fsubd %f16,%f4,%f4
+/* 0x0d78 1128 (58 61) */ fitod %f10,%f14
+/* 0x0d7c 1129 (59 62) */ fsubd %f16,%f8,%f8
+/* 0x0d80 1130 (60 63) */ fmuld %f4,%f12,%f18
+/* 0x0d84 1131 (60 63) */ fitod %f11,%f10
+/* 0x0d88 1132 (61 64) */ fmuld %f4,%f6,%f4
+/* 0x0d8c 1133 (61 64) */ fsubd %f16,%f14,%f14
+/* 0x0d90 1134 (62 65) */ fmuld %f8,%f12,%f20
+/* 0x0d94 1135 (63 66) */ fmuld %f8,%f6,%f8
+/* 0x0d98 1136 (63 66) */ fsubd %f16,%f10,%f10
+/* 0x0d9c 1137 (64 67) */ fdtox %f4,%f4
+/* 0x0da0 1138 (64 65) */ std %f4,[%sp+2335]
+/* 0x0da4 1139 (65 68) */ fmuld %f14,%f12,%f4
+/* 0x0da8 1140 (65 68) */ fdtox %f18,%f18
+/* 0x0dac 1141 (65 66) */ std %f18,[%sp+2343]
+/* 0x0db0 1142 (66 69) */ fdtox %f8,%f8
+/* 0x0db4 1143 (66 67) */ std %f8,[%sp+2319]
+/* 0x0db8 1144 (66 69) */ fmuld %f14,%f6,%f14
+/* 0x0dbc 1145 (67 70) */ fmuld %f10,%f12,%f8
+/* 0x0dc0 1146 (67 70) */ fdtox %f20,%f18
+/* 0x0dc4 1147 (67 68) */ std %f18,[%sp+2327]
+/* 0x0dc8 1148 (68 71) */ fdtox %f4,%f4
+/* 0x0dcc 1149 (68 69) */ std %f4,[%sp+2311]
+/* 0x0dd0 1150 (68 71) */ fmuld %f10,%f6,%f10
+/* 0x0dd4 1151 (69 72) */ fdtox %f14,%f14
+/* 0x0dd8 1152 (69 70) */ std %f14,[%sp+2303]
+/* 0x0ddc 1153 (70 73) */ ldd [%o2+48],%f4
+/* 0x0de0 1154 (70 73) */ fdtox %f8,%f8
+/* 0x0de4 1155 (71 74) */ fdtox %f10,%f10
+/* 0x0de8 1156 (71 72) */ std %f10,[%sp+2287]
+/* 0x0dec 1157 (72 74) */ fxnor %f0,%f4,%f4
+/* 0x0df0 1158 (72 75) */ ldd [%o2+56],%f14
+/* 0x0df4 1159 (73 74) */ std %f8,[%sp+2295]
+/* 0x0df8 1160 (74 77) */ fitod %f4,%f10
+/* 0x0dfc 1161 (75 78) */ fitod %f5,%f4
+/* 0x0e00 1162 (76 78) */ fxnor %f0,%f14,%f8
+/* 0x0e04 1163 (77 80) */ fsubd %f16,%f10,%f10
+/* 0x0e08 1164 (78 81) */ fsubd %f16,%f4,%f4
+/* 0x0e0c 1165 (79 82) */ fitod %f8,%f14
+/* 0x0e10 1166 (80 83) */ fmuld %f10,%f12,%f18
+/* 0x0e14 1167 (80 83) */ fitod %f9,%f8
+/* 0x0e18 1168 (81 84) */ fmuld %f10,%f6,%f10
+/* 0x0e1c 1169 (82 85) */ fmuld %f4,%f12,%f20
+/* 0x0e20 1170 (82 85) */ fsubd %f16,%f14,%f14
+/* 0x0e24 1171 (83 86) */ fdtox %f18,%f18
+/* 0x0e28 1172 (83 84) */ std %f18,[%sp+2279]
+/* 0x0e2c 1173 (83 86) */ fmuld %f4,%f6,%f4
+/* 0x0e30 1174 (84 87) */ fdtox %f10,%f10
+/* 0x0e34 1175 (84 85) */ std %f10,[%sp+2271]
+/* 0x0e38 1176 (85 88) */ fdtox %f20,%f10
+/* 0x0e3c 1177 (85 86) */ std %f10,[%sp+2263]
+/* 0x0e40 1178 (86 89) */ fdtox %f4,%f4
+/* 0x0e44 1179 (86 87) */ std %f4,[%sp+2255]
+/* 0x0e48 1180 (86 89) */ fmuld %f14,%f12,%f10
+/* 0x0e4c 1181 (87 90) */ fmuld %f14,%f6,%f4
+/* 0x0e50 1182 (89 92) */ fdtox %f10,%f10
+/* 0x0e54 1183 (89 90) */ std %f10,[%sp+2247]
+/* 0x0e58 1184 (90 93) */ fdtox %f4,%f4
+/* 0x0e5c 1185 (90 91) */ std %f4,[%sp+2239]
+/* 0x0e60 1189 (91 93) */ ldx [%sp+2463],%g2
+/* 0x0e64 1190 (91 94) */ fsubd %f16,%f8,%f4
+/* 0x0e68 1191 (92 94) */ ldx [%sp+2471],%g3
+/* 0x0e6c 1192 (93 96) */ ld [%i1],%g4
+/* 0x0e70 1193 (93 94) */ sllx %g2,19,%g2
+/* 0x0e74 1194 (94 96) */ ldx [%sp+2455],%g5
+/* 0x0e78 1195 (94 95) */ add %g3,%g2,%g2
+/* 0x0e7c 1196 (94 97) */ fmuld %f4,%f6,%f6
+/* 0x0e80 1197 (95 97) */ ldx [%sp+2447],%g3
+/* 0x0e84 1198 (95 96) */ add %g2,%g4,%g4
+/* 0x0e88 1199 (95 98) */ fmuld %f4,%f12,%f4
+/* 0x0e8c 1200 (96 97) */ st %g4,[%i0]
+/* 0x0e90 1201 (96 97) */ srlx %g4,32,%g4
+/* 0x0e94 1202 (97 100) */ ld [%i1+8],%o0
+/* 0x0e98 1203 (97 98) */ sllx %g3,19,%g2
+/* 0x0e9c 1204 (97 100) */ fdtox %f6,%f6
+/* 0x0ea0 1205 (98 101) */ ld [%i1+4],%g3
+/* 0x0ea4 1206 (98 99) */ add %g5,%g2,%g2
+/* 0x0ea8 1207 (98 101) */ fdtox %f4,%f4
+/* 0x0eac 1208 (99 101) */ ldx [%sp+2439],%g5
+/* 0x0eb0 1209 (100 103) */ ld [%i1+12],%o1
+/* 0x0eb4 1210 (100 101) */ add %g2,%g3,%g2
+/* 0x0eb8 1211 (101 103) */ ldx [%sp+2431],%g3
+/* 0x0ebc 1212 (101 102) */ add %g2,%g4,%g4
+/* 0x0ec0 1213 (102 103) */ st %g4,[%i0+4]
+/* 0x0ec4 1214 (103 104) */ std %f6,[%sp+2223]
+/* 0x0ec8 1215 (103 104) */ sllx %g3,19,%g2
+/* 0x0ecc 1216 (104 106) */ ldx [%sp+2423],%g3
+/* 0x0ed0 1217 (104 105) */ add %g5,%g2,%g2
+/* 0x0ed4 1218 (105 107) */ ldx [%sp+2415],%g5
+/* 0x0ed8 1219 (105 106) */ add %g2,%o0,%g2
+/* 0x0edc 1220 (106 107) */ std %f4,[%sp+2231]
+/* 0x0ee0 1221 (106 107) */ srlx %g4,32,%o0
+/* 0x0ee4 1222 (107 109) */ ldx [%sp+2407],%g4
+/* 0x0ee8 1223 (107 108) */ sllx %g5,19,%g5
+/* 0x0eec 1224 (107 108) */ add %g2,%o0,%g2
+/* 0x0ef0 1225 (108 109) */ st %g2,[%i0+8]
+/* 0x0ef4 1226 (108 109) */ srlx %g2,32,%o0
+/* 0x0ef8 1227 (108 109) */ add %g3,%g5,%g3
+/* 0x0efc 1228 (109 111) */ ldx [%sp+2399],%g5
+/* 0x0f00 1229 (109 110) */ add %g3,%o1,%g3
+/* 0x0f04 1230 (110 113) */ ld [%i1+16],%o1
+/* 0x0f08 1231 (110 111) */ add %g3,%o0,%g3
+/* 0x0f0c 1232 (111 112) */ st %g3,[%i0+12]
+/* 0x0f10 1233 (111 112) */ sllx %g5,19,%g5
+/* 0x0f14 1234 (112 113) */ srlx %g3,32,%o0
+/* 0x0f18 1235 (112 113) */ add %g4,%g5,%g2
+/* 0x0f1c 1236 (112 114) */ ldx [%sp+2383],%g5
+/* 0x0f20 1237 (113 115) */ ldx [%sp+2391],%g4
+/* 0x0f24 1238 (113 114) */ add %g2,%o1,%g2
+/* 0x0f28 1239 (114 117) */ ld [%i1+20],%o1
+/* 0x0f2c 1240 (114 115) */ sllx %g5,19,%g5
+/* 0x0f30 1241 (114 115) */ add %g2,%o0,%g2
+/* 0x0f34 1242 (115 116) */ st %g2,[%i0+16]
+/* 0x0f38 1243 (115 116) */ srlx %g2,32,%o0
+/* 0x0f3c 1244 (115 116) */ add %g4,%g5,%g3
+/* 0x0f40 1245 (116 118) */ ldx [%sp+2367],%g5
+/* 0x0f44 1246 (116 117) */ add %g3,%o1,%g3
+/* 0x0f48 1247 (117 119) */ ldx [%sp+2375],%g4
+/* 0x0f4c 1248 (117 118) */ add %g3,%o0,%g3
+/* 0x0f50 1249 (118 121) */ ld [%i1+24],%o1
+/* 0x0f54 1250 (118 119) */ sllx %g5,19,%g5
+/* 0x0f58 1251 (119 120) */ st %g3,[%i0+20]
+/* 0x0f5c 1252 (119 120) */ add %g4,%g5,%g2
+/* 0x0f60 1253 (120 122) */ ldx [%sp+2351],%g5
+/* 0x0f64 1254 (120 121) */ srlx %g3,32,%o0
+/* 0x0f68 1255 (120 121) */ add %g2,%o1,%g2
+/* 0x0f6c 1256 (121 123) */ ldx [%sp+2359],%g4
+/* 0x0f70 1257 (121 122) */ add %g2,%o0,%g2
+/* 0x0f74 1258 (122 125) */ ld [%i1+28],%o1
+/* 0x0f78 1259 (122 123) */ sllx %g5,19,%g5
+/* 0x0f7c 1260 (123 124) */ st %g2,[%i0+24]
+/* 0x0f80 1261 (123 124) */ add %g4,%g5,%g3
+/* 0x0f84 1265 (124 126) */ ldx [%sp+2335],%g5
+/* 0x0f88 1266 (124 125) */ srlx %g2,32,%o0
+/* 0x0f8c 1267 (124 125) */ add %g3,%o1,%g3
+/* 0x0f90 1268 (125 127) */ ldx [%sp+2343],%g4
+/* 0x0f94 1269 (125 126) */ add %g3,%o0,%g3
+/* 0x0f98 1270 (126 127) */ sllx %g5,19,%g5
+/* 0x0f9c 1271 (126 129) */ ld [%i1+32],%o1
+/* 0x0fa0 1272 (127 128) */ add %g4,%g5,%g2
+/* 0x0fa4 1273 (127 129) */ ldx [%sp+2319],%g5
+/* 0x0fa8 1274 (128 130) */ ldx [%sp+2327],%g4
+/* 0x0fac 1275 (128 129) */ srlx %g3,32,%o0
+/* 0x0fb0 1276 (128 129) */ add %g2,%o1,%g2
+/* 0x0fb4 1277 (129 130) */ st %g3,[%i0+28]
+/* 0x0fb8 1278 (129 130) */ sllx %g5,19,%g5
+/* 0x0fbc 1279 (129 130) */ add %g2,%o0,%g2
+/* 0x0fc0 1280 (130 133) */ ld [%i1+36],%o1
+/* 0x0fc4 1281 (130 131) */ add %g4,%g5,%g3
+/* 0x0fc8 1282 (131 133) */ ldx [%sp+2303],%g5
+/* 0x0fcc 1283 (131 132) */ srlx %g2,32,%o0
+/* 0x0fd0 1284 (132 134) */ ldx [%sp+2311],%g4
+/* 0x0fd4 1285 (132 133) */ add %g3,%o1,%g3
+/* 0x0fd8 1286 (133 134) */ sllx %g5,19,%g5
+/* 0x0fdc 1287 (133 134) */ st %g2,[%i0+32]
+/* 0x0fe0 1288 (133 134) */ add %g3,%o0,%g3
+/* 0x0fe4 1289 (134 135) */ add %g4,%g5,%g2
+/* 0x0fe8 1290 (134 136) */ ldx [%sp+2287],%g5
+/* 0x0fec 1291 (135 137) */ ldx [%sp+2295],%g4
+/* 0x0ff0 1292 (135 136) */ srlx %g3,32,%o0
+/* 0x0ff4 1293 (136 139) */ ld [%i1+40],%o1
+/* 0x0ff8 1294 (136 137) */ sllx %g5,19,%g5
+/* 0x0ffc 1295 (137 138) */ st %g3,[%i0+36]
+/* 0x1000 1296 (137 138) */ add %g4,%g5,%g3
+/* 0x1004 1297 (138 140) */ ldx [%sp+2271],%g5
+/* 0x1008 1298 (138 139) */ add %g2,%o1,%g2
+/* 0x100c 1299 (139 141) */ ldx [%sp+2279],%g4
+/* 0x1010 1300 (139 140) */ add %g2,%o0,%g2
+/* 0x1014 1301 (140 143) */ ld [%i1+44],%o1
+/* 0x1018 1302 (140 141) */ sllx %g5,19,%g5
+/* 0x101c 1303 (141 142) */ st %g2,[%i0+40]
+/* 0x1020 1304 (141 142) */ srlx %g2,32,%o0
+/* 0x1024 1305 (141 142) */ add %g4,%g5,%g2
+/* 0x1028 1306 (142 144) */ ldx [%sp+2255],%g5
+/* 0x102c 1307 (142 143) */ add %g3,%o1,%g3
+/* 0x1030 1308 (143 145) */ ldx [%sp+2263],%g4
+/* 0x1034 1309 (143 144) */ add %g3,%o0,%g3
+/* 0x1038 1310 (144 147) */ ld [%i1+48],%o1
+/* 0x103c 1311 (144 145) */ sllx %g5,19,%g5
+/* 0x1040 1312 (145 146) */ srlx %g3,32,%o0
+/* 0x1044 1313 (145 146) */ st %g3,[%i0+44]
+/* 0x1048 1314 (145 146) */ add %g4,%g5,%g3
+/* 0x104c 1315 (146 148) */ ldx [%sp+2239],%g5
+/* 0x1050 1316 (146 147) */ add %g2,%o1,%g2
+/* 0x1054 1317 (147 150) */ ld [%i1+52],%o1
+/* 0x1058 1318 (147 148) */ add %g2,%o0,%g2
+/* 0x105c 1319 (148 150) */ ldx [%sp+2247],%g4
+/* 0x1060 1320 (148 149) */ sllx %g5,19,%g5
+/* 0x1064 1321 (149 150) */ srlx %g2,32,%o0
+/* 0x1068 1322 (149 150) */ st %g2,[%i0+48]
+/* 0x106c 1323 (149 150) */ add %g3,%o1,%g3
+/* 0x1070 1324 (150 153) */ ld [%i1+56],%o1
+/* 0x1074 1325 (150 151) */ add %g4,%g5,%g2
+/* 0x1078 1326 (150 151) */ add %g3,%o0,%g3
+/* 0x107c 1327 (151 153) */ ldx [%sp+2223],%g5
+/* 0x1080 1328 (151 152) */ srlx %g3,32,%o0
+/* 0x1084 1329 (152 154) */ ldx [%sp+2231],%g4
+/* 0x1088 1330 (152 153) */ add %g2,%o1,%g2
+/* 0x108c 1331 (153 154) */ sllx %g5,19,%g5
+/* 0x1090 1332 (153 156) */ ld [%i1+60],%o1
+/* 0x1094 1333 (153 154) */ add %g2,%o0,%g2
+/* 0x1098 1334 (154 155) */ st %g3,[%i0+52]
+/* 0x109c 1335 (154 155) */ add %g4,%g5,%g3
+/* 0x10a0 1336 (155 156) */ st %g2,[%i0+56]
+/* 0x10a4 1337 (155 156) */ srlx %g2,32,%g2
+/* 0x10a8 1338 (155 156) */ add %g3,%o1,%g3
+/* 0x10ac 1339 (156 157) */ add %g3,%g2,%g2
+/* 0x10b0 1340 (156 157) */ st %g2,[%i0+60]
+/* 0x10b4 1344 (157 158) */ srlx %g2,32,%o3
+/* 0x10b8 1345 (158 159) */ srl %o3,0,%i0
+/* 0x10bc (159 161) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x10c0 (161 162) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000073
+!
+
+ .L77000073: /* frequency 1.0 confidence 0.0 */
+
+
+ or %g0, %i4, %o2
+ or %g0, %o0, %o1
+ or %g0, %i3, %o0
+
+!
+! ENTRY .L77000052
+!
+
+ .L77000052: /* frequency 1.0 confidence 0.0 */
+/* 0x1028 1318 ( 0 1) */ andn %o2,%g2,%g2
+/* 0x102c 1319 ( 0 1) */ st %g2,[%sp+2227]
+/* 0x1030 1325 ( 0 1) */ add %o0,1,%g3
+/* 0x1034 1326 ( 0 1) */ fmovd %f0,%f14
+/* 0x1038 1327 ( 1 2) */ srl %o2,19,%g2
+/* 0x103c 1328 ( 1 2) */ st %g2,[%sp+2223]
+/* 0x1040 1329 ( 1 2) */ or %g0,0,%o5
+/* 0x1044 1330 ( 2 3) */ srl %g3,31,%g2
+/* 0x1048 1331 ( 2 5) */ ldd [%o1],%f6
+/* 0x104c 1335 ( 2 3) */ sethi %hi(0x1000),%g1
+/* 0x1050 1336 ( 3 4) */ add %g3,%g2,%g2
+/* 0x1054 1337 ( 3 4) */ xor %g1,-625,%g1
+/* 0x1058 1338 ( 3 6) */ ldd [%o1+8],%f20
+/* 0x105c 1339 ( 4 5) */ sra %g2,1,%o3
+/* 0x1060 1340 ( 4 5) */ fmovs %f6,%f8
+/* 0x1064 1341 ( 4 5) */ add %g1,%fp,%g3
+/* 0x1068 1342 ( 5 6) */ fmovs %f6,%f10
+/* 0x106c 1343 ( 5 7) */ ld [%sp+2227],%f9
+/* 0x1070 1344 ( 5 6) */ subcc %o3,0,%g0
+/* 0x1074 1345 ( 6 8) */ ld [%sp+2223],%f11
+/* 0x1078 1346 ( 6 7) */ sethi %hi(0x1000),%g1
+/* 0x107c 1347 ( 6 7) */ or %g0,%i2,%o1
+/* 0x1080 1348 ( 7 10) */ fsubd %f8,%f6,%f18
+/* 0x1084 1349 ( 7 8) */ xor %g1,-617,%g1
+/* 0x1088 1350 ( 7 8) */ or %g0,0,%g4
+/* 0x108c 1351 ( 8 11) */ fsubd %f10,%f6,%f16
+/* 0x1090 1352 ( 8 9) */ bleu,pt %icc,.L990000162 ! tprob=0.50
+/* 0x1094 ( 8 9) */ subcc %o0,0,%g0
+/* 0x1098 1354 ( 9 10) */ add %g1,%fp,%g2
+/* 0x109c 1355 ( 9 10) */ sethi %hi(0x1000),%g1
+/* 0x10a0 1356 (10 11) */ xor %g1,-609,%g1
+/* 0x10a4 1357 (10 11) */ subcc %o3,7,%g0
+/* 0x10a8 1358 (11 12) */ add %g1,%fp,%o7
+/* 0x10ac 1359 (11 12) */ sethi %hi(0x1000),%g1
+/* 0x10b0 1360 (12 13) */ xor %g1,-601,%g1
+/* 0x10b4 1361 (13 14) */ add %g1,%fp,%o4
+/* 0x10b8 1362 (13 14) */ bl,pn %icc,.L77000054 ! tprob=0.50
+/* 0x10bc (13 14) */ sub %o3,2,%o2
+/* 0x10c0 1364 (14 17) */ ldd [%o1],%f2
+/* 0x10c4 1365 (14 15) */ add %o1,16,%g5
+/* 0x10c8 1366 (14 15) */ or %g0,4,%g4
+/* 0x10cc 1367 (15 18) */ ldd [%o1+8],%f0
+/* 0x10d0 1368 (15 16) */ add %o1,8,%o1
+/* 0x10d4 1369 (16 18) */ fxnor %f14,%f2,%f6
+/* 0x10d8 1370 (16 19) */ ldd [%g5],%f4
+/* 0x10dc 1371 (16 17) */ add %o1,16,%o1
+/* 0x10e0 1372 (17 19) */ fxnor %f14,%f0,%f12
+/* 0x10e4 1373 (17 20) */ ldd [%o1],%f0
+/* 0x10e8 1374 (17 18) */ add %o1,8,%o1
+/* 0x10ec 1375 (18 21) */ fitod %f7,%f2
+/* 0x10f0 1376 (19 22) */ fitod %f6,%f6
+/* 0x10f4 1377 (20 22) */ fxnor %f14,%f4,%f10
+/* 0x10f8 1378 (21 24) */ fsubd %f20,%f2,%f2
+/* 0x10fc 1379 (22 24) */ fxnor %f14,%f0,%f8
+/* 0x1100 1380 (23 26) */ fitod %f13,%f4
+/* 0x1104 1381 (24 27) */ fsubd %f20,%f6,%f6
+/* 0x1108 1382 (24 27) */ fmuld %f2,%f16,%f0
+
+!
+! ENTRY .L990000154
+!
+
+ .L990000154: /* frequency 1.0 confidence 0.0 */
+/* 0x110c 1384 ( 0 3) */ ldd [%o1],%f24
+/* 0x1110 1385 ( 0 1) */ add %g4,3,%g4
+/* 0x1114 1386 ( 0 1) */ add %o4,96,%o4
+/* 0x1118 1387 ( 1 4) */ fitod %f11,%f22
+/* 0x111c 1388 ( 2 5) */ fsubd %f20,%f4,%f26
+/* 0x1120 1389 ( 2 3) */ subcc %g4,%o2,%g0
+/* 0x1124 1390 ( 2 3) */ add %o7,96,%o7
+/* 0x1128 1391 ( 2 5) */ fmuld %f6,%f18,%f28
+/* 0x112c 1392 ( 3 6) */ fmuld %f6,%f16,%f6
+/* 0x1130 1393 ( 3 4) */ add %g2,96,%g2
+/* 0x1134 1394 ( 3 4) */ add %g3,96,%g3
+/* 0x1138 1395 ( 4 7) */ fdtox %f0,%f0
+/* 0x113c 1396 ( 5 8) */ fitod %f12,%f4
+/* 0x1140 1397 ( 5 8) */ fmuld %f2,%f18,%f2
+/* 0x1144 1398 ( 6 9) */ fdtox %f28,%f12
+/* 0x1148 1399 ( 7 10) */ fdtox %f6,%f6
+/* 0x114c 1400 ( 7 8) */ std %f12,[%g3-96]
+/* 0x1150 1401 ( 8 9) */ std %f6,[%g2-96]
+/* 0x1154 1402 ( 8 11) */ fdtox %f2,%f2
+/* 0x1158 1403 ( 9 12) */ fsubd %f20,%f4,%f6
+/* 0x115c 1404 ( 9 10) */ std %f2,[%o7-96]
+/* 0x1160 1405 ( 9 10) */ add %o1,8,%o1
+/* 0x1164 1406 (10 12) */ fxnor %f14,%f24,%f12
+/* 0x1168 1407 (10 13) */ fmuld %f26,%f16,%f4
+/* 0x116c 1408 (10 11) */ std %f0,[%o4-96]
+/* 0x1170 1409 (11 14) */ ldd [%o1],%f0
+/* 0x1174 1410 (11 14) */ fitod %f9,%f2
+/* 0x1178 1411 (12 15) */ fsubd %f20,%f22,%f28
+/* 0x117c 1412 (12 15) */ fmuld %f6,%f18,%f24
+/* 0x1180 1413 (13 16) */ fmuld %f6,%f16,%f22
+/* 0x1184 1414 (13 16) */ fdtox %f4,%f4
+/* 0x1188 1415 (14 17) */ fitod %f10,%f6
+/* 0x118c 1416 (14 17) */ fmuld %f26,%f18,%f10
+/* 0x1190 1417 (15 18) */ fdtox %f24,%f24
+/* 0x1194 1418 (16 19) */ fdtox %f22,%f22
+/* 0x1198 1419 (16 17) */ std %f24,[%g3-64]
+/* 0x119c 1420 (17 18) */ std %f22,[%g2-64]
+/* 0x11a0 1421 (17 20) */ fdtox %f10,%f10
+/* 0x11a4 1422 (18 21) */ fsubd %f20,%f6,%f6
+/* 0x11a8 1423 (18 19) */ std %f10,[%o7-64]
+/* 0x11ac 1424 (18 19) */ add %o1,8,%o1
+/* 0x11b0 1425 (19 21) */ fxnor %f14,%f0,%f10
+/* 0x11b4 1426 (19 22) */ fmuld %f28,%f16,%f0
+/* 0x11b8 1427 (19 20) */ std %f4,[%o4-64]
+/* 0x11bc 1428 (20 23) */ ldd [%o1],%f22
+/* 0x11c0 1429 (20 23) */ fitod %f13,%f4
+/* 0x11c4 1430 (21 24) */ fsubd %f20,%f2,%f2
+/* 0x11c8 1431 (21 24) */ fmuld %f6,%f18,%f26
+/* 0x11cc 1432 (22 25) */ fmuld %f6,%f16,%f24
+/* 0x11d0 1433 (22 25) */ fdtox %f0,%f0
+/* 0x11d4 1434 (23 26) */ fitod %f8,%f6
+/* 0x11d8 1435 (23 26) */ fmuld %f28,%f18,%f8
+/* 0x11dc 1436 (24 27) */ fdtox %f26,%f26
+/* 0x11e0 1437 (25 28) */ fdtox %f24,%f24
+/* 0x11e4 1438 (25 26) */ std %f26,[%g3-32]
+/* 0x11e8 1439 (26 27) */ std %f24,[%g2-32]
+/* 0x11ec 1440 (26 29) */ fdtox %f8,%f8
+/* 0x11f0 1441 (27 30) */ fsubd %f20,%f6,%f6
+/* 0x11f4 1442 (27 28) */ std %f8,[%o7-32]
+/* 0x11f8 1443 (27 28) */ add %o1,8,%o1
+/* 0x11fc 1444 (28 30) */ fxnor %f14,%f22,%f8
+/* 0x1200 1445 (28 29) */ std %f0,[%o4-32]
+/* 0x1204 1446 (28 29) */ bcs,pt %icc,.L990000154 ! tprob=0.50
+/* 0x1208 (28 31) */ fmuld %f2,%f16,%f0
+
+!
+! ENTRY .L990000157
+!
+
+ .L990000157: /* frequency 1.0 confidence 0.0 */
+/* 0x120c 1449 ( 0 3) */ fitod %f12,%f28
+/* 0x1210 1450 ( 0 3) */ fmuld %f6,%f18,%f24
+/* 0x1214 1451 ( 0 1) */ add %g3,128,%g3
+/* 0x1218 1452 ( 1 4) */ fitod %f10,%f12
+/* 0x121c 1453 ( 1 4) */ fmuld %f6,%f16,%f26
+/* 0x1220 1454 ( 1 2) */ add %g2,128,%g2
+/* 0x1224 1455 ( 2 5) */ fsubd %f20,%f4,%f4
+/* 0x1228 1456 ( 2 5) */ fmuld %f2,%f18,%f22
+/* 0x122c 1457 ( 2 3) */ add %o7,128,%o7
+/* 0x1230 1458 ( 3 6) */ fdtox %f24,%f6
+/* 0x1234 1459 ( 3 4) */ std %f6,[%g3-128]
+/* 0x1238 1460 ( 3 4) */ add %o4,128,%o4
+/* 0x123c 1461 ( 4 7) */ fsubd %f20,%f28,%f2
+/* 0x1240 1462 ( 4 5) */ subcc %g4,%o3,%g0
+/* 0x1244 1463 ( 5 8) */ fitod %f11,%f6
+/* 0x1248 1464 ( 5 8) */ fmuld %f4,%f18,%f24
+/* 0x124c 1465 ( 6 9) */ fdtox %f26,%f10
+/* 0x1250 1466 ( 6 7) */ std %f10,[%g2-128]
+/* 0x1254 1467 ( 7 10) */ fdtox %f22,%f10
+/* 0x1258 1468 ( 7 8) */ std %f10,[%o7-128]
+/* 0x125c 1469 ( 7 10) */ fmuld %f2,%f18,%f26
+/* 0x1260 1470 ( 8 11) */ fsubd %f20,%f12,%f10
+/* 0x1264 1471 ( 8 11) */ fmuld %f2,%f16,%f2
+/* 0x1268 1472 ( 9 12) */ fsubd %f20,%f6,%f22
+/* 0x126c 1473 ( 9 12) */ fmuld %f4,%f16,%f12
+/* 0x1270 1474 (10 13) */ fdtox %f0,%f0
+/* 0x1274 1475 (10 11) */ std %f0,[%o4-128]
+/* 0x1278 1476 (11 14) */ fitod %f8,%f4
+/* 0x127c 1477 (11 14) */ fmuld %f10,%f18,%f6
+/* 0x1280 1478 (12 15) */ fdtox %f26,%f0
+/* 0x1284 1479 (12 13) */ std %f0,[%g3-96]
+/* 0x1288 1480 (12 15) */ fmuld %f10,%f16,%f10
+/* 0x128c 1481 (13 16) */ fdtox %f2,%f2
+/* 0x1290 1482 (13 14) */ std %f2,[%g2-96]
+/* 0x1294 1483 (14 17) */ fitod %f9,%f0
+/* 0x1298 1484 (14 17) */ fmuld %f22,%f18,%f2
+/* 0x129c 1485 (15 18) */ fdtox %f24,%f8
+/* 0x12a0 1486 (15 16) */ std %f8,[%o7-96]
+/* 0x12a4 1487 (16 19) */ fsubd %f20,%f4,%f4
+/* 0x12a8 1488 (16 19) */ fmuld %f22,%f16,%f8
+/* 0x12ac 1489 (17 20) */ fdtox %f12,%f12
+/* 0x12b0 1490 (17 18) */ std %f12,[%o4-96]
+/* 0x12b4 1491 (18 21) */ fsubd %f20,%f0,%f0
+/* 0x12b8 1492 (19 22) */ fdtox %f6,%f6
+/* 0x12bc 1493 (19 20) */ std %f6,[%g3-64]
+/* 0x12c0 1494 (20 23) */ fdtox %f10,%f10
+/* 0x12c4 1495 (20 21) */ std %f10,[%g2-64]
+/* 0x12c8 1496 (20 23) */ fmuld %f4,%f18,%f6
+/* 0x12cc 1497 (21 24) */ fdtox %f2,%f2
+/* 0x12d0 1498 (21 22) */ std %f2,[%o7-64]
+/* 0x12d4 1499 (21 24) */ fmuld %f4,%f16,%f4
+/* 0x12d8 1500 (22 25) */ fmuld %f0,%f18,%f2
+/* 0x12dc 1501 (22 25) */ fdtox %f8,%f8
+/* 0x12e0 1502 (22 23) */ std %f8,[%o4-64]
+/* 0x12e4 1503 (23 26) */ fdtox %f6,%f6
+/* 0x12e8 1504 (23 24) */ std %f6,[%g3-32]
+/* 0x12ec 1505 (23 26) */ fmuld %f0,%f16,%f0
+/* 0x12f0 1506 (24 27) */ fdtox %f4,%f4
+/* 0x12f4 1507 (24 25) */ std %f4,[%g2-32]
+/* 0x12f8 1508 (25 28) */ fdtox %f2,%f2
+/* 0x12fc 1509 (25 26) */ std %f2,[%o7-32]
+/* 0x1300 1510 (26 29) */ fdtox %f0,%f0
+/* 0x1304 1511 (26 27) */ bcc,pn %icc,.L77000056 ! tprob=0.50
+/* 0x1308 (26 27) */ std %f0,[%o4-32]
+
+!
+! ENTRY .L77000054
+!
+
+ .L77000054: /* frequency 1.0 confidence 0.0 */
+/* 0x130c 1514 ( 0 3) */ ldd [%o1],%f0
+
+!
+! ENTRY .L990000161
+!
+
+ .L990000161: /* frequency 1.0 confidence 0.0 */
+/* 0x1310 1516 ( 0 2) */ fxnor %f14,%f0,%f0
+/* 0x1314 1517 ( 0 1) */ add %g4,1,%g4
+/* 0x1318 1518 ( 0 1) */ add %o1,8,%o1
+/* 0x131c 1519 ( 1 2) */ subcc %g4,%o3,%g0
+/* 0x1320 1520 ( 2 5) */ fitod %f0,%f2
+/* 0x1324 1521 ( 3 6) */ fitod %f1,%f0
+/* 0x1328 1522 ( 5 8) */ fsubd %f20,%f2,%f2
+/* 0x132c 1523 ( 6 9) */ fsubd %f20,%f0,%f0
+/* 0x1330 1524 ( 8 11) */ fmuld %f2,%f18,%f6
+/* 0x1334 1525 ( 9 12) */ fmuld %f2,%f16,%f4
+/* 0x1338 1526 (10 13) */ fmuld %f0,%f18,%f2
+/* 0x133c 1527 (11 14) */ fdtox %f6,%f6
+/* 0x1340 1528 (11 12) */ std %f6,[%g3]
+/* 0x1344 1529 (11 14) */ fmuld %f0,%f16,%f0
+/* 0x1348 1530 (12 15) */ fdtox %f4,%f4
+/* 0x134c 1531 (12 13) */ std %f4,[%g2]
+/* 0x1350 1532 (12 13) */ add %g2,32,%g2
+/* 0x1354 1533 (13 16) */ fdtox %f2,%f2
+/* 0x1358 1534 (13 14) */ std %f2,[%o7]
+/* 0x135c 1535 (13 14) */ add %o7,32,%o7
+/* 0x1360 1536 (14 17) */ fdtox %f0,%f0
+/* 0x1364 1537 (14 15) */ std %f0,[%o4]
+/* 0x1368 1538 (14 15) */ add %o4,32,%o4
+/* 0x136c 1539 (15 16) */ add %g3,32,%g3
+/* 0x1370 1540 (15 16) */ bcs,a,pt %icc,.L990000161 ! tprob=0.50
+/* 0x1374 (16 19) */ ldd [%o1],%f0
+
+!
+! ENTRY .L77000056
+!
+
+ .L77000056: /* frequency 1.0 confidence 0.0 */
+/* 0x1378 1548 ( 0 1) */ subcc %o0,0,%g0
+
+!
+! ENTRY .L990000162
+!
+
+ .L990000162: /* frequency 1.0 confidence 0.0 */
+/* 0x137c 1550 ( 0 1) */ bleu,pt %icc,.L77770061 ! tprob=0.50
+/* 0x1380 ( 0 1) */ nop
+/* 0x1384 1555 ( 0 1) */ sethi %hi(0x1000),%g1
+/* 0x1388 1556 ( 1 2) */ xor %g1,-625,%g1
+/* 0x138c 1557 ( 1 2) */ or %g0,%i1,%g4
+/* 0x1390 1558 ( 2 3) */ add %g1,%fp,%g5
+/* 0x1394 1559 ( 2 3) */ sethi %hi(0x1000),%g1
+/* 0x1398 1560 ( 3 4) */ xor %g1,-617,%g1
+/* 0x139c 1561 ( 3 4) */ or %g0,%o0,%o7
+/* 0x13a0 1562 ( 4 5) */ add %g1,%fp,%g2
+/* 0x13a4 1563 ( 4 5) */ or %g0,0,%i2
+/* 0x13a8 1564 ( 5 6) */ or %g0,%i0,%g3
+/* 0x13ac 1565 ( 5 6) */ subcc %o0,6,%g0
+/* 0x13b0 1566 ( 5 6) */ bl,pn %icc,.L77000058 ! tprob=0.50
+/* 0x13b4 ( 6 7) */ sethi %hi(0x1000),%g1
+/* 0x13b8 1568 ( 6 8) */ ld [%g4],%o2
+/* 0x13bc 1569 ( 6 7) */ add %g3,4,%g3
+/* 0x13c0 1570 ( 7 8) */ xor %g1,-585,%g1
+/* 0x13c4 1571 ( 7 8) */ sub %o7,3,%o4
+/* 0x13c8 1572 ( 8 9) */ add %g1,%fp,%g2
+/* 0x13cc 1573 ( 8 9) */ sethi %hi(0x1000),%g1
+/* 0x13d0 1574 ( 9 10) */ xor %g1,-593,%g1
+/* 0x13d4 1575 ( 9 10) */ or %g0,2,%i2
+/* 0x13d8 1576 (10 11) */ add %g1,%fp,%g5
+/* 0x13dc 1577 (10 11) */ sethi %hi(0x1000),%g1
+/* 0x13e0 1578 (11 12) */ xor %g1,-617,%g1
+/* 0x13e4 1579 (12 13) */ add %g1,%fp,%g1
+/* 0x13e8 1580 (13 15) */ ldx [%g1],%o1
+/* 0x13ec 1581 (14 16) */ ldx [%g1-8],%o0
+/* 0x13f0 1582 (15 16) */ sllx %o1,19,%o1
+/* 0x13f4 1583 (15 17) */ ldx [%g1+16],%o3
+/* 0x13f8 1584 (16 17) */ add %o0,%o1,%o0
+/* 0x13fc 1585 (16 18) */ ld [%g4+4],%o1
+/* 0x1400 1586 (16 17) */ add %g4,8,%g4
+/* 0x1404 1587 (17 18) */ sllx %o3,19,%o3
+/* 0x1408 1588 (17 18) */ add %o0,%o2,%o0
+/* 0x140c 1589 (17 19) */ ldx [%g1+8],%o2
+/* 0x1410 1590 (18 19) */ st %o0,[%g3-4]
+/* 0x1414 1591 (18 19) */ srlx %o0,32,%o0
+
+!
+! ENTRY .L990000142
+!
+
+ .L990000142: /* frequency 1.0 confidence 0.0 */
+/* 0x1418 1593 ( 0 1) */ add %o2,%o3,%o2
+/* 0x141c 1594 ( 0 1) */ add %i2,4,%i2
+/* 0x1420 1595 ( 0 2) */ ld [%g4],%o3
+/* 0x1424 1596 ( 1 2) */ srl %o0,0,%o5
+/* 0x1428 1597 ( 1 2) */ add %o2,%o1,%o1
+/* 0x142c 1598 ( 1 3) */ ldx [%g2],%o0
+/* 0x1430 1599 ( 3 4) */ sllx %o0,19,%o2
+/* 0x1434 1600 ( 3 5) */ ldx [%g5],%o0
+/* 0x1438 1601 ( 3 4) */ add %o1,%o5,%o1
+/* 0x143c 1602 ( 4 5) */ st %o1,[%g3]
+/* 0x1440 1603 ( 4 5) */ srlx %o1,32,%o5
+/* 0x1444 1604 ( 4 5) */ subcc %i2,%o4,%g0
+/* 0x1448 1605 ( 5 7) */ ldx [%g2+16],%o1
+/* 0x144c 1606 ( 5 6) */ add %o0,%o2,%o0
+/* 0x1450 1607 ( 5 6) */ add %g3,16,%g3
+/* 0x1454 1608 ( 6 8) */ ld [%g4+4],%o2
+/* 0x1458 1609 ( 6 7) */ add %o0,%o3,%o0
+/* 0x145c 1610 ( 7 8) */ sllx %o1,19,%o3
+/* 0x1460 1611 ( 7 9) */ ldx [%g5+16],%o1
+/* 0x1464 1612 ( 7 8) */ add %o0,%o5,%o0
+/* 0x1468 1613 ( 8 9) */ st %o0,[%g3-12]
+/* 0x146c 1614 ( 8 9) */ srlx %o0,32,%o5
+/* 0x1470 1615 ( 8 9) */ add %g4,16,%g4
+/* 0x1474 1616 ( 9 11) */ ldx [%g2+32],%o0
+/* 0x1478 1617 ( 9 10) */ add %o1,%o3,%o1
+/* 0x147c 1618 ( 9 10) */ add %g2,64,%g2
+/* 0x1480 1619 (10 12) */ ld [%g4-8],%o3
+/* 0x1484 1620 (10 11) */ add %o1,%o2,%o2
+/* 0x1488 1621 (11 12) */ sllx %o0,19,%o1
+/* 0x148c 1622 (11 13) */ ldx [%g5+32],%o0
+/* 0x1490 1623 (11 12) */ add %o2,%o5,%o2
+/* 0x1494 1624 (12 13) */ st %o2,[%g3-8]
+/* 0x1498 1625 (12 13) */ srlx %o2,32,%o5
+/* 0x149c 1626 (12 13) */ add %g5,64,%g5
+/* 0x14a0 1627 (13 15) */ ldx [%g2-16],%o2
+/* 0x14a4 1628 (13 14) */ add %o0,%o1,%o0
+/* 0x14a8 1629 (14 16) */ ld [%g4-4],%o1
+/* 0x14ac 1630 (14 15) */ add %o0,%o3,%o0
+/* 0x14b0 1631 (15 16) */ sllx %o2,19,%o3
+/* 0x14b4 1632 (15 17) */ ldx [%g5-16],%o2
+/* 0x14b8 1633 (15 16) */ add %o0,%o5,%o0
+/* 0x14bc 1634 (16 17) */ st %o0,[%g3-4]
+/* 0x14c0 1635 (16 17) */ bcs,pt %icc,.L990000142 ! tprob=0.50
+/* 0x14c4 (16 17) */ srlx %o0,32,%o0
+
+!
+! ENTRY .L990000145
+!
+
+ .L990000145: /* frequency 1.0 confidence 0.0 */
+/* 0x14c8 1638 ( 0 1) */ add %o2,%o3,%o3
+/* 0x14cc 1639 ( 0 1) */ add %g3,4,%g3
+/* 0x14d0 1640 ( 1 2) */ srl %o0,0,%o2
+/* 0x14d4 1641 ( 1 2) */ add %o3,%o1,%o0
+/* 0x14d8 1642 ( 2 3) */ add %o0,%o2,%o0
+/* 0x14dc 1643 ( 2 3) */ st %o0,[%g3-4]
+/* 0x14e0 1644 ( 2 3) */ subcc %i2,%o7,%g0
+/* 0x14e4 1645 ( 2 3) */ bcc,pn %icc,.L77770061 ! tprob=0.50
+/* 0x14e8 ( 3 4) */ srlx %o0,32,%o5
+
+!
+! ENTRY .L77000058
+!
+
+ .L77000058: /* frequency 1.0 confidence 0.0 */
+/* 0x14ec 1648 ( 0 2) */ ldx [%g2],%o2
+
+!
+! ENTRY .L990000160
+!
+
+ .L990000160: /* frequency 1.0 confidence 0.0 */
+/* 0x14f0 1650 ( 0 1) */ sllx %o2,19,%o3
+/* 0x14f4 1651 ( 0 2) */ ldx [%g5],%o0
+/* 0x14f8 1652 ( 0 1) */ add %i2,1,%i2
+/* 0x14fc 1653 ( 1 2) */ srl %o5,0,%o1
+/* 0x1500 1654 ( 1 3) */ ld [%g4],%o2
+/* 0x1504 1655 ( 1 2) */ add %g2,16,%g2
+/* 0x1508 1656 ( 2 3) */ add %o0,%o3,%o0
+/* 0x150c 1657 ( 2 3) */ add %g5,16,%g5
+/* 0x1510 1658 ( 3 4) */ add %o0,%o2,%o0
+/* 0x1514 1659 ( 3 4) */ add %g4,4,%g4
+/* 0x1518 1660 ( 4 5) */ add %o0,%o1,%o0
+/* 0x151c 1661 ( 4 5) */ st %o0,[%g3]
+/* 0x1520 1662 ( 4 5) */ subcc %i2,%o7,%g0
+/* 0x1524 1663 ( 5 6) */ srlx %o0,32,%o5
+/* 0x1528 1664 ( 5 6) */ add %g3,4,%g3
+/* 0x152c 1665 ( 5 6) */ bcs,a,pt %icc,.L990000160 ! tprob=0.50
+/* 0x1530 ( 6 8) */ ldx [%g2],%o2
+
+!
+! ENTRY .L77770061
+!
+
+ .L77770061: /* frequency 1.0 confidence 0.0 */
+/* 0x1534 ( 0 2) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x1538 ( 2 3) */ restore %g0,%o5,%o0
+
+
+/* 0x124c 1476 ( 0 0) */ .type mul_add,2
+/* 0x124c 1477 ( 0 0) */ .size mul_add,(.-mul_add)
+/* 0x124c 1480 ( 0 0) */ .align 8
+/* 0x1250 1486 ( 0 0) */ .global mul_add_inp
+
+!
+! ENTRY mul_add_inp
+!
+
+ .global mul_add_inp
+ mul_add_inp: /* frequency 1.0 confidence 0.0 */
+/* 0x1250 1488 ( 0 1) */ save %sp,-176,%sp
+/* 0x1254 1500 ( 1 2) */ sra %i2,0,%o3
+/* 0x1258 1501 ( 1 2) */ or %g0,%i1,%o2
+/* 0x125c 1502 ( 2 3) */ or %g0,%i0,%o0
+/* 0x1260 1503 ( 2 3) */ or %g0,%i0,%o1
+/* 0x1264 1504 ( 3 5) */ call mul_add ! params = ! Result =
+/* 0x1268 ( 4 5) */ srl %i3,0,%o4
+/* 0x126c 1506 ( 5 6) */ srl %o0,0,%i0
+/* 0x1270 ( 6 8) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x1274 ( 8 9) */ restore %g0,%g0,%g0
+/* 0x1278 1509 ( 0 0) */ .type mul_add_inp,2
+/* 0x1278 1510 ( 0 0) */ .size mul_add_inp,(.-mul_add_inp)
+
+ .section ".data",#alloc,#write
+/* 0x1278 6 ( 0 0) */ .align 8
+
+!
+! ENTRY mask_cnst
+!
+
+ mask_cnst: /* frequency 1.0 confidence 0.0 */
+/* 0x1278 8 ( 0 0) */ .xword -9223372034707292160
+/* 0x1280 9 ( 0 0) */ .type mask_cnst,#object
+/* 0x1280 10 ( 0 0) */ .size mask_cnst,8
+
diff --git a/security/nss/lib/freebl/mpi/mpvalpha.c b/security/nss/lib/freebl/mpi/mpvalpha.c
new file mode 100644
index 0000000000..94e86eedb9
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpvalpha.c
@@ -0,0 +1,183 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#include <c_asm.h>
+
+#define MP_MUL_DxD(a, b, Phi, Plo) \
+ { \
+ Plo = asm("mulq %a0, %a1, %v0", a, b); \
+ Phi = asm("umulh %a0, %a1, %v0", a, b); \
+ }
+
+/* This is empty for the loop in s_mpv_mul_d */
+#define CARRY_ADD
+
+#define ONE_MUL \
+ a_i = *a++; \
+ MP_MUL_DxD(a_i, b, a1b1, a0b0); \
+ a0b0 += carry; \
+ if (a0b0 < carry) \
+ ++a1b1; \
+ CARRY_ADD \
+ *c++ = a0b0; \
+ carry = a1b1;
+
+#define FOUR_MUL \
+ ONE_MUL \
+ ONE_MUL \
+ ONE_MUL \
+ ONE_MUL
+
+#define SIXTEEN_MUL \
+ FOUR_MUL \
+ FOUR_MUL \
+ FOUR_MUL \
+ FOUR_MUL
+
+#define THIRTYTWO_MUL \
+ SIXTEEN_MUL \
+ SIXTEEN_MUL
+
+#define ONETWENTYEIGHT_MUL \
+ THIRTYTWO_MUL \
+ THIRTYTWO_MUL \
+ THIRTYTWO_MUL \
+ THIRTYTWO_MUL
+
+#define EXPAND_256(CALL) \
+ mp_digit carry = 0; \
+ mp_digit a_i; \
+ mp_digit a0b0, a1b1; \
+ if (a_len & 255) { \
+ if (a_len & 1) { \
+ ONE_MUL \
+ } \
+ if (a_len & 2) { \
+ ONE_MUL \
+ ONE_MUL \
+ } \
+ if (a_len & 4) { \
+ FOUR_MUL \
+ } \
+ if (a_len & 8) { \
+ FOUR_MUL \
+ FOUR_MUL \
+ } \
+ if (a_len & 16) { \
+ SIXTEEN_MUL \
+ } \
+ if (a_len & 32) { \
+ THIRTYTWO_MUL \
+ } \
+ if (a_len & 64) { \
+ THIRTYTWO_MUL \
+ THIRTYTWO_MUL \
+ } \
+ if (a_len & 128) { \
+ ONETWENTYEIGHT_MUL \
+ } \
+ a_len = a_len & (-256); \
+ } \
+ if (a_len >= 256) { \
+ carry = CALL(a, a_len, b, c, carry); \
+ c += a_len; \
+ }
+
+#define FUNC_NAME(NAME) \
+ mp_digit NAME(const mp_digit *a, \
+ mp_size a_len, \
+ mp_digit b, mp_digit *c, \
+ mp_digit carry)
+
+#define DECLARE_MUL_256(FNAME) \
+ FUNC_NAME(FNAME) \
+ { \
+ mp_digit a_i; \
+ mp_digit a0b0, a1b1; \
+ while (a_len) { \
+ ONETWENTYEIGHT_MUL \
+ ONETWENTYEIGHT_MUL \
+ a_len -= 256; \
+ } \
+ return carry; \
+ }
+
+/* Expanding the loop in s_mpv_mul_d appeared to slow down the
+ (admittedly) small number of tests (i.e., timetest) used to
+ measure performance, so this define disables that optimization. */
+#define DO_NOT_EXPAND 1
+
+/* Need forward declaration so it can be instantiated after
+ the routine that uses it; this helps locality somewhat */
+#if !defined(DO_NOT_EXPAND)
+FUNC_NAME(s_mpv_mul_d_MUL256);
+#endif
+
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c)
+{
+#if defined(DO_NOT_EXPAND)
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+#else
+ EXPAND_256(s_mpv_mul_d_MUL256)
+#endif
+ *c = carry;
+}
+
+#if !defined(DO_NOT_EXPAND)
+DECLARE_MUL_256(s_mpv_mul_d_MUL256)
+#endif
+
+#undef CARRY_ADD
+/* This is redefined for the loop in s_mpv_mul_d_add */
+#define CARRY_ADD \
+ a0b0 += a_i = *c; \
+ if (a0b0 < a_i) \
+ ++a1b1;
+
+/* Need forward declaration so it can be instantiated between the
+ two routines that use it; this helps locality somewhat */
+FUNC_NAME(s_mpv_mul_d_add_MUL256);
+
+/* c += a * b */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c)
+{
+ EXPAND_256(s_mpv_mul_d_add_MUL256)
+ *c = carry;
+}
+
+/* Instantiate multiply 256 routine here */
+DECLARE_MUL_256(s_mpv_mul_d_add_MUL256)
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c)
+{
+ EXPAND_256(s_mpv_mul_d_add_MUL256)
+ while (carry) {
+ mp_digit c_i = *c;
+ carry += c_i;
+ *c++ = carry;
+ carry = carry < c_i;
+ }
+}
diff --git a/security/nss/lib/freebl/mpi/mulsqr.c b/security/nss/lib/freebl/mpi/mulsqr.c
new file mode 100644
index 0000000000..461d40ab36
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mulsqr.c
@@ -0,0 +1,84 @@
+/*
+ * Test whether to include squaring code given the current settings
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <time.h>
+
+#define MP_SQUARE 1 /* make sure squaring code is included */
+
+#include "mpi.h"
+#include "mpprime.h"
+
+int
+main(int argc, char *argv[])
+{
+ int ntests, prec, ix;
+ unsigned int seed;
+ clock_t start, stop;
+ double multime, sqrtime;
+ mp_int a, c;
+
+ seed = (unsigned int)time(NULL);
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s <ntests> <nbits>\n", argv[0]);
+ return 1;
+ }
+
+ if ((ntests = abs(atoi(argv[1]))) == 0) {
+ fprintf(stderr, "%s: must request at least 1 test.\n", argv[0]);
+ return 1;
+ }
+ if ((prec = abs(atoi(argv[2]))) < CHAR_BIT) {
+ fprintf(stderr, "%s: must request at least %d bits.\n", argv[0],
+ CHAR_BIT);
+ return 1;
+ }
+
+ prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT;
+
+ mp_init_size(&a, prec);
+ mp_init_size(&c, 2 * prec);
+
+ /* Test multiplication by self */
+ srand(seed);
+ start = clock();
+ for (ix = 0; ix < ntests; ix++) {
+ mpp_random_size(&a, prec);
+ mp_mul(&a, &a, &c);
+ }
+ stop = clock();
+
+ multime = (double)(stop - start) / CLOCKS_PER_SEC;
+
+ /* Test squaring */
+ srand(seed);
+ start = clock();
+ for (ix = 0; ix < ntests; ix++) {
+ mpp_random_size(&a, prec);
+ mp_sqr(&a, &c);
+ }
+ stop = clock();
+
+ sqrtime = (double)(stop - start) / CLOCKS_PER_SEC;
+
+ printf("Multiply: %.4f\n", multime);
+ printf("Square: %.4f\n", sqrtime);
+ if (multime < sqrtime) {
+ printf("Speedup: %.1f%%\n", 100.0 * (1.0 - multime / sqrtime));
+ printf("Prefer: multiply\n");
+ } else {
+ printf("Speedup: %.1f%%\n", 100.0 * (1.0 - sqrtime / multime));
+ printf("Prefer: square\n");
+ }
+
+ mp_clear(&a);
+ mp_clear(&c);
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/primes.c b/security/nss/lib/freebl/mpi/primes.c
new file mode 100644
index 0000000000..3e64a2acaa
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/primes.c
@@ -0,0 +1,841 @@
+/*
+ * These tables of primes wwere generated using the 'sieve' program
+ * (sieve.c) and converted to this format with 'ptab.pl'.
+ *
+ * The 'small' table is just the first 128 primes. The 'large' table
+ * is a table of all the prime values that will fit into a single
+ * mp_digit (given the current size of an mp_digit, which is two bytes).
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#if SMALL_TABLE
+#define MP_PRIME_TAB_SIZE 128
+#else
+#define MP_PRIME_TAB_SIZE 6542
+#endif
+
+const int prime_tab_size = MP_PRIME_TAB_SIZE;
+const mp_digit prime_tab[] = {
+ 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
+ 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
+ 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
+ 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
+ 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
+ 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
+ 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
+ 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
+ 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
+ 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
+ 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
+ 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
+ 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
+ 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
+ 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
+ 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
+#if !SMALL_TABLE
+ 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
+ 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
+ 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
+ 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
+ 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
+ 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
+ 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
+ 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
+ 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
+ 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
+ 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
+ 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
+ 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
+ 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
+ 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
+ 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653,
+ 0x0655, 0x065B, 0x0665, 0x0679, 0x067F, 0x0683, 0x0685, 0x069D,
+ 0x06A1, 0x06A3, 0x06AD, 0x06B9, 0x06BB, 0x06C5, 0x06CD, 0x06D3,
+ 0x06D9, 0x06DF, 0x06F1, 0x06F7, 0x06FB, 0x06FD, 0x0709, 0x0713,
+ 0x071F, 0x0727, 0x0737, 0x0745, 0x074B, 0x074F, 0x0751, 0x0755,
+ 0x0757, 0x0761, 0x076D, 0x0773, 0x0779, 0x078B, 0x078D, 0x079D,
+ 0x079F, 0x07B5, 0x07BB, 0x07C3, 0x07C9, 0x07CD, 0x07CF, 0x07D3,
+ 0x07DB, 0x07E1, 0x07EB, 0x07ED, 0x07F7, 0x0805, 0x080F, 0x0815,
+ 0x0821, 0x0823, 0x0827, 0x0829, 0x0833, 0x083F, 0x0841, 0x0851,
+ 0x0853, 0x0859, 0x085D, 0x085F, 0x0869, 0x0871, 0x0883, 0x089B,
+ 0x089F, 0x08A5, 0x08AD, 0x08BD, 0x08BF, 0x08C3, 0x08CB, 0x08DB,
+ 0x08DD, 0x08E1, 0x08E9, 0x08EF, 0x08F5, 0x08F9, 0x0905, 0x0907,
+ 0x091D, 0x0923, 0x0925, 0x092B, 0x092F, 0x0935, 0x0943, 0x0949,
+ 0x094D, 0x094F, 0x0955, 0x0959, 0x095F, 0x096B, 0x0971, 0x0977,
+ 0x0985, 0x0989, 0x098F, 0x099B, 0x09A3, 0x09A9, 0x09AD, 0x09C7,
+ 0x09D9, 0x09E3, 0x09EB, 0x09EF, 0x09F5, 0x09F7, 0x09FD, 0x0A13,
+ 0x0A1F, 0x0A21, 0x0A31, 0x0A39, 0x0A3D, 0x0A49, 0x0A57, 0x0A61,
+ 0x0A63, 0x0A67, 0x0A6F, 0x0A75, 0x0A7B, 0x0A7F, 0x0A81, 0x0A85,
+ 0x0A8B, 0x0A93, 0x0A97, 0x0A99, 0x0A9F, 0x0AA9, 0x0AAB, 0x0AB5,
+ 0x0ABD, 0x0AC1, 0x0ACF, 0x0AD9, 0x0AE5, 0x0AE7, 0x0AED, 0x0AF1,
+ 0x0AF3, 0x0B03, 0x0B11, 0x0B15, 0x0B1B, 0x0B23, 0x0B29, 0x0B2D,
+ 0x0B3F, 0x0B47, 0x0B51, 0x0B57, 0x0B5D, 0x0B65, 0x0B6F, 0x0B7B,
+ 0x0B89, 0x0B8D, 0x0B93, 0x0B99, 0x0B9B, 0x0BB7, 0x0BB9, 0x0BC3,
+ 0x0BCB, 0x0BCF, 0x0BDD, 0x0BE1, 0x0BE9, 0x0BF5, 0x0BFB, 0x0C07,
+ 0x0C0B, 0x0C11, 0x0C25, 0x0C2F, 0x0C31, 0x0C41, 0x0C5B, 0x0C5F,
+ 0x0C61, 0x0C6D, 0x0C73, 0x0C77, 0x0C83, 0x0C89, 0x0C91, 0x0C95,
+ 0x0C9D, 0x0CB3, 0x0CB5, 0x0CB9, 0x0CBB, 0x0CC7, 0x0CE3, 0x0CE5,
+ 0x0CEB, 0x0CF1, 0x0CF7, 0x0CFB, 0x0D01, 0x0D03, 0x0D0F, 0x0D13,
+ 0x0D1F, 0x0D21, 0x0D2B, 0x0D2D, 0x0D3D, 0x0D3F, 0x0D4F, 0x0D55,
+ 0x0D69, 0x0D79, 0x0D81, 0x0D85, 0x0D87, 0x0D8B, 0x0D8D, 0x0DA3,
+ 0x0DAB, 0x0DB7, 0x0DBD, 0x0DC7, 0x0DC9, 0x0DCD, 0x0DD3, 0x0DD5,
+ 0x0DDB, 0x0DE5, 0x0DE7, 0x0DF3, 0x0DFD, 0x0DFF, 0x0E09, 0x0E17,
+ 0x0E1D, 0x0E21, 0x0E27, 0x0E2F, 0x0E35, 0x0E3B, 0x0E4B, 0x0E57,
+ 0x0E59, 0x0E5D, 0x0E6B, 0x0E71, 0x0E75, 0x0E7D, 0x0E87, 0x0E8F,
+ 0x0E95, 0x0E9B, 0x0EB1, 0x0EB7, 0x0EB9, 0x0EC3, 0x0ED1, 0x0ED5,
+ 0x0EDB, 0x0EED, 0x0EEF, 0x0EF9, 0x0F07, 0x0F0B, 0x0F0D, 0x0F17,
+ 0x0F25, 0x0F29, 0x0F31, 0x0F43, 0x0F47, 0x0F4D, 0x0F4F, 0x0F53,
+ 0x0F59, 0x0F5B, 0x0F67, 0x0F6B, 0x0F7F, 0x0F95, 0x0FA1, 0x0FA3,
+ 0x0FA7, 0x0FAD, 0x0FB3, 0x0FB5, 0x0FBB, 0x0FD1, 0x0FD3, 0x0FD9,
+ 0x0FE9, 0x0FEF, 0x0FFB, 0x0FFD, 0x1003, 0x100F, 0x101F, 0x1021,
+ 0x1025, 0x102B, 0x1039, 0x103D, 0x103F, 0x1051, 0x1069, 0x1073,
+ 0x1079, 0x107B, 0x1085, 0x1087, 0x1091, 0x1093, 0x109D, 0x10A3,
+ 0x10A5, 0x10AF, 0x10B1, 0x10BB, 0x10C1, 0x10C9, 0x10E7, 0x10F1,
+ 0x10F3, 0x10FD, 0x1105, 0x110B, 0x1115, 0x1127, 0x112D, 0x1139,
+ 0x1145, 0x1147, 0x1159, 0x115F, 0x1163, 0x1169, 0x116F, 0x1181,
+ 0x1183, 0x118D, 0x119B, 0x11A1, 0x11A5, 0x11A7, 0x11AB, 0x11C3,
+ 0x11C5, 0x11D1, 0x11D7, 0x11E7, 0x11EF, 0x11F5, 0x11FB, 0x120D,
+ 0x121D, 0x121F, 0x1223, 0x1229, 0x122B, 0x1231, 0x1237, 0x1241,
+ 0x1247, 0x1253, 0x125F, 0x1271, 0x1273, 0x1279, 0x127D, 0x128F,
+ 0x1297, 0x12AF, 0x12B3, 0x12B5, 0x12B9, 0x12BF, 0x12C1, 0x12CD,
+ 0x12D1, 0x12DF, 0x12FD, 0x1307, 0x130D, 0x1319, 0x1327, 0x132D,
+ 0x1337, 0x1343, 0x1345, 0x1349, 0x134F, 0x1357, 0x135D, 0x1367,
+ 0x1369, 0x136D, 0x137B, 0x1381, 0x1387, 0x138B, 0x1391, 0x1393,
+ 0x139D, 0x139F, 0x13AF, 0x13BB, 0x13C3, 0x13D5, 0x13D9, 0x13DF,
+ 0x13EB, 0x13ED, 0x13F3, 0x13F9, 0x13FF, 0x141B, 0x1421, 0x142F,
+ 0x1433, 0x143B, 0x1445, 0x144D, 0x1459, 0x146B, 0x146F, 0x1471,
+ 0x1475, 0x148D, 0x1499, 0x149F, 0x14A1, 0x14B1, 0x14B7, 0x14BD,
+ 0x14CB, 0x14D5, 0x14E3, 0x14E7, 0x1505, 0x150B, 0x1511, 0x1517,
+ 0x151F, 0x1525, 0x1529, 0x152B, 0x1537, 0x153D, 0x1541, 0x1543,
+ 0x1549, 0x155F, 0x1565, 0x1567, 0x156B, 0x157D, 0x157F, 0x1583,
+ 0x158F, 0x1591, 0x1597, 0x159B, 0x15B5, 0x15BB, 0x15C1, 0x15C5,
+ 0x15CD, 0x15D7, 0x15F7, 0x1607, 0x1609, 0x160F, 0x1613, 0x1615,
+ 0x1619, 0x161B, 0x1625, 0x1633, 0x1639, 0x163D, 0x1645, 0x164F,
+ 0x1655, 0x1669, 0x166D, 0x166F, 0x1675, 0x1693, 0x1697, 0x169F,
+ 0x16A9, 0x16AF, 0x16B5, 0x16BD, 0x16C3, 0x16CF, 0x16D3, 0x16D9,
+ 0x16DB, 0x16E1, 0x16E5, 0x16EB, 0x16ED, 0x16F7, 0x16F9, 0x1709,
+ 0x170F, 0x1723, 0x1727, 0x1733, 0x1741, 0x175D, 0x1763, 0x1777,
+ 0x177B, 0x178D, 0x1795, 0x179B, 0x179F, 0x17A5, 0x17B3, 0x17B9,
+ 0x17BF, 0x17C9, 0x17CB, 0x17D5, 0x17E1, 0x17E9, 0x17F3, 0x17F5,
+ 0x17FF, 0x1807, 0x1813, 0x181D, 0x1835, 0x1837, 0x183B, 0x1843,
+ 0x1849, 0x184D, 0x1855, 0x1867, 0x1871, 0x1877, 0x187D, 0x187F,
+ 0x1885, 0x188F, 0x189B, 0x189D, 0x18A7, 0x18AD, 0x18B3, 0x18B9,
+ 0x18C1, 0x18C7, 0x18D1, 0x18D7, 0x18D9, 0x18DF, 0x18E5, 0x18EB,
+ 0x18F5, 0x18FD, 0x1915, 0x191B, 0x1931, 0x1933, 0x1945, 0x1949,
+ 0x1951, 0x195B, 0x1979, 0x1981, 0x1993, 0x1997, 0x1999, 0x19A3,
+ 0x19A9, 0x19AB, 0x19B1, 0x19B5, 0x19C7, 0x19CF, 0x19DB, 0x19ED,
+ 0x19FD, 0x1A03, 0x1A05, 0x1A11, 0x1A17, 0x1A21, 0x1A23, 0x1A2D,
+ 0x1A2F, 0x1A35, 0x1A3F, 0x1A4D, 0x1A51, 0x1A69, 0x1A6B, 0x1A7B,
+ 0x1A7D, 0x1A87, 0x1A89, 0x1A93, 0x1AA7, 0x1AAB, 0x1AAD, 0x1AB1,
+ 0x1AB9, 0x1AC9, 0x1ACF, 0x1AD5, 0x1AD7, 0x1AE3, 0x1AF3, 0x1AFB,
+ 0x1AFF, 0x1B05, 0x1B23, 0x1B25, 0x1B2F, 0x1B31, 0x1B37, 0x1B3B,
+ 0x1B41, 0x1B47, 0x1B4F, 0x1B55, 0x1B59, 0x1B65, 0x1B6B, 0x1B73,
+ 0x1B7F, 0x1B83, 0x1B91, 0x1B9D, 0x1BA7, 0x1BBF, 0x1BC5, 0x1BD1,
+ 0x1BD7, 0x1BD9, 0x1BEF, 0x1BF7, 0x1C09, 0x1C13, 0x1C19, 0x1C27,
+ 0x1C2B, 0x1C2D, 0x1C33, 0x1C3D, 0x1C45, 0x1C4B, 0x1C4F, 0x1C55,
+ 0x1C73, 0x1C81, 0x1C8B, 0x1C8D, 0x1C99, 0x1CA3, 0x1CA5, 0x1CB5,
+ 0x1CB7, 0x1CC9, 0x1CE1, 0x1CF3, 0x1CF9, 0x1D09, 0x1D1B, 0x1D21,
+ 0x1D23, 0x1D35, 0x1D39, 0x1D3F, 0x1D41, 0x1D4B, 0x1D53, 0x1D5D,
+ 0x1D63, 0x1D69, 0x1D71, 0x1D75, 0x1D7B, 0x1D7D, 0x1D87, 0x1D89,
+ 0x1D95, 0x1D99, 0x1D9F, 0x1DA5, 0x1DA7, 0x1DB3, 0x1DB7, 0x1DC5,
+ 0x1DD7, 0x1DDB, 0x1DE1, 0x1DF5, 0x1DF9, 0x1E01, 0x1E07, 0x1E0B,
+ 0x1E13, 0x1E17, 0x1E25, 0x1E2B, 0x1E2F, 0x1E3D, 0x1E49, 0x1E4D,
+ 0x1E4F, 0x1E6D, 0x1E71, 0x1E89, 0x1E8F, 0x1E95, 0x1EA1, 0x1EAD,
+ 0x1EBB, 0x1EC1, 0x1EC5, 0x1EC7, 0x1ECB, 0x1EDD, 0x1EE3, 0x1EEF,
+ 0x1EF7, 0x1EFD, 0x1F01, 0x1F0D, 0x1F0F, 0x1F1B, 0x1F39, 0x1F49,
+ 0x1F4B, 0x1F51, 0x1F67, 0x1F75, 0x1F7B, 0x1F85, 0x1F91, 0x1F97,
+ 0x1F99, 0x1F9D, 0x1FA5, 0x1FAF, 0x1FB5, 0x1FBB, 0x1FD3, 0x1FE1,
+ 0x1FE7, 0x1FEB, 0x1FF3, 0x1FFF, 0x2011, 0x201B, 0x201D, 0x2027,
+ 0x2029, 0x202D, 0x2033, 0x2047, 0x204D, 0x2051, 0x205F, 0x2063,
+ 0x2065, 0x2069, 0x2077, 0x207D, 0x2089, 0x20A1, 0x20AB, 0x20B1,
+ 0x20B9, 0x20C3, 0x20C5, 0x20E3, 0x20E7, 0x20ED, 0x20EF, 0x20FB,
+ 0x20FF, 0x210D, 0x2113, 0x2135, 0x2141, 0x2149, 0x214F, 0x2159,
+ 0x215B, 0x215F, 0x2173, 0x217D, 0x2185, 0x2195, 0x2197, 0x21A1,
+ 0x21AF, 0x21B3, 0x21B5, 0x21C1, 0x21C7, 0x21D7, 0x21DD, 0x21E5,
+ 0x21E9, 0x21F1, 0x21F5, 0x21FB, 0x2203, 0x2209, 0x220F, 0x221B,
+ 0x2221, 0x2225, 0x222B, 0x2231, 0x2239, 0x224B, 0x224F, 0x2263,
+ 0x2267, 0x2273, 0x2275, 0x227F, 0x2285, 0x2287, 0x2291, 0x229D,
+ 0x229F, 0x22A3, 0x22B7, 0x22BD, 0x22DB, 0x22E1, 0x22E5, 0x22ED,
+ 0x22F7, 0x2303, 0x2309, 0x230B, 0x2327, 0x2329, 0x232F, 0x2333,
+ 0x2335, 0x2345, 0x2351, 0x2353, 0x2359, 0x2363, 0x236B, 0x2383,
+ 0x238F, 0x2395, 0x23A7, 0x23AD, 0x23B1, 0x23BF, 0x23C5, 0x23C9,
+ 0x23D5, 0x23DD, 0x23E3, 0x23EF, 0x23F3, 0x23F9, 0x2405, 0x240B,
+ 0x2417, 0x2419, 0x2429, 0x243D, 0x2441, 0x2443, 0x244D, 0x245F,
+ 0x2467, 0x246B, 0x2479, 0x247D, 0x247F, 0x2485, 0x249B, 0x24A1,
+ 0x24AF, 0x24B5, 0x24BB, 0x24C5, 0x24CB, 0x24CD, 0x24D7, 0x24D9,
+ 0x24DD, 0x24DF, 0x24F5, 0x24F7, 0x24FB, 0x2501, 0x2507, 0x2513,
+ 0x2519, 0x2527, 0x2531, 0x253D, 0x2543, 0x254B, 0x254F, 0x2573,
+ 0x2581, 0x258D, 0x2593, 0x2597, 0x259D, 0x259F, 0x25AB, 0x25B1,
+ 0x25BD, 0x25CD, 0x25CF, 0x25D9, 0x25E1, 0x25F7, 0x25F9, 0x2605,
+ 0x260B, 0x260F, 0x2615, 0x2627, 0x2629, 0x2635, 0x263B, 0x263F,
+ 0x264B, 0x2653, 0x2659, 0x2665, 0x2669, 0x266F, 0x267B, 0x2681,
+ 0x2683, 0x268F, 0x269B, 0x269F, 0x26AD, 0x26B3, 0x26C3, 0x26C9,
+ 0x26CB, 0x26D5, 0x26DD, 0x26EF, 0x26F5, 0x2717, 0x2719, 0x2735,
+ 0x2737, 0x274D, 0x2753, 0x2755, 0x275F, 0x276B, 0x276D, 0x2773,
+ 0x2777, 0x277F, 0x2795, 0x279B, 0x279D, 0x27A7, 0x27AF, 0x27B3,
+ 0x27B9, 0x27C1, 0x27C5, 0x27D1, 0x27E3, 0x27EF, 0x2803, 0x2807,
+ 0x280D, 0x2813, 0x281B, 0x281F, 0x2821, 0x2831, 0x283D, 0x283F,
+ 0x2849, 0x2851, 0x285B, 0x285D, 0x2861, 0x2867, 0x2875, 0x2881,
+ 0x2897, 0x289F, 0x28BB, 0x28BD, 0x28C1, 0x28D5, 0x28D9, 0x28DB,
+ 0x28DF, 0x28ED, 0x28F7, 0x2903, 0x2905, 0x2911, 0x2921, 0x2923,
+ 0x293F, 0x2947, 0x295D, 0x2965, 0x2969, 0x296F, 0x2975, 0x2983,
+ 0x2987, 0x298F, 0x299B, 0x29A1, 0x29A7, 0x29AB, 0x29BF, 0x29C3,
+ 0x29D5, 0x29D7, 0x29E3, 0x29E9, 0x29ED, 0x29F3, 0x2A01, 0x2A13,
+ 0x2A1D, 0x2A25, 0x2A2F, 0x2A4F, 0x2A55, 0x2A5F, 0x2A65, 0x2A6B,
+ 0x2A6D, 0x2A73, 0x2A83, 0x2A89, 0x2A8B, 0x2A97, 0x2A9D, 0x2AB9,
+ 0x2ABB, 0x2AC5, 0x2ACD, 0x2ADD, 0x2AE3, 0x2AEB, 0x2AF1, 0x2AFB,
+ 0x2B13, 0x2B27, 0x2B31, 0x2B33, 0x2B3D, 0x2B3F, 0x2B4B, 0x2B4F,
+ 0x2B55, 0x2B69, 0x2B6D, 0x2B6F, 0x2B7B, 0x2B8D, 0x2B97, 0x2B99,
+ 0x2BA3, 0x2BA5, 0x2BA9, 0x2BBD, 0x2BCD, 0x2BE7, 0x2BEB, 0x2BF3,
+ 0x2BF9, 0x2BFD, 0x2C09, 0x2C0F, 0x2C17, 0x2C23, 0x2C2F, 0x2C35,
+ 0x2C39, 0x2C41, 0x2C57, 0x2C59, 0x2C69, 0x2C77, 0x2C81, 0x2C87,
+ 0x2C93, 0x2C9F, 0x2CAD, 0x2CB3, 0x2CB7, 0x2CCB, 0x2CCF, 0x2CDB,
+ 0x2CE1, 0x2CE3, 0x2CE9, 0x2CEF, 0x2CFF, 0x2D07, 0x2D1D, 0x2D1F,
+ 0x2D3B, 0x2D43, 0x2D49, 0x2D4D, 0x2D61, 0x2D65, 0x2D71, 0x2D89,
+ 0x2D9D, 0x2DA1, 0x2DA9, 0x2DB3, 0x2DB5, 0x2DC5, 0x2DC7, 0x2DD3,
+ 0x2DDF, 0x2E01, 0x2E03, 0x2E07, 0x2E0D, 0x2E19, 0x2E1F, 0x2E25,
+ 0x2E2D, 0x2E33, 0x2E37, 0x2E39, 0x2E3F, 0x2E57, 0x2E5B, 0x2E6F,
+ 0x2E79, 0x2E7F, 0x2E85, 0x2E93, 0x2E97, 0x2E9D, 0x2EA3, 0x2EA5,
+ 0x2EB1, 0x2EB7, 0x2EC1, 0x2EC3, 0x2ECD, 0x2ED3, 0x2EE7, 0x2EEB,
+ 0x2F05, 0x2F09, 0x2F0B, 0x2F11, 0x2F27, 0x2F29, 0x2F41, 0x2F45,
+ 0x2F4B, 0x2F4D, 0x2F51, 0x2F57, 0x2F6F, 0x2F75, 0x2F7D, 0x2F81,
+ 0x2F83, 0x2FA5, 0x2FAB, 0x2FB3, 0x2FC3, 0x2FCF, 0x2FD1, 0x2FDB,
+ 0x2FDD, 0x2FE7, 0x2FED, 0x2FF5, 0x2FF9, 0x3001, 0x300D, 0x3023,
+ 0x3029, 0x3037, 0x303B, 0x3055, 0x3059, 0x305B, 0x3067, 0x3071,
+ 0x3079, 0x307D, 0x3085, 0x3091, 0x3095, 0x30A3, 0x30A9, 0x30B9,
+ 0x30BF, 0x30C7, 0x30CB, 0x30D1, 0x30D7, 0x30DF, 0x30E5, 0x30EF,
+ 0x30FB, 0x30FD, 0x3103, 0x3109, 0x3119, 0x3121, 0x3127, 0x312D,
+ 0x3139, 0x3143, 0x3145, 0x314B, 0x315D, 0x3161, 0x3167, 0x316D,
+ 0x3173, 0x317F, 0x3191, 0x3199, 0x319F, 0x31A9, 0x31B1, 0x31C3,
+ 0x31C7, 0x31D5, 0x31DB, 0x31ED, 0x31F7, 0x31FF, 0x3209, 0x3215,
+ 0x3217, 0x321D, 0x3229, 0x3235, 0x3259, 0x325D, 0x3263, 0x326B,
+ 0x326F, 0x3275, 0x3277, 0x327B, 0x328D, 0x3299, 0x329F, 0x32A7,
+ 0x32AD, 0x32B3, 0x32B7, 0x32C9, 0x32CB, 0x32CF, 0x32D1, 0x32E9,
+ 0x32ED, 0x32F3, 0x32F9, 0x3307, 0x3325, 0x332B, 0x332F, 0x3335,
+ 0x3341, 0x3347, 0x335B, 0x335F, 0x3367, 0x336B, 0x3373, 0x3379,
+ 0x337F, 0x3383, 0x33A1, 0x33A3, 0x33AD, 0x33B9, 0x33C1, 0x33CB,
+ 0x33D3, 0x33EB, 0x33F1, 0x33FD, 0x3401, 0x340F, 0x3413, 0x3419,
+ 0x341B, 0x3437, 0x3445, 0x3455, 0x3457, 0x3463, 0x3469, 0x346D,
+ 0x3481, 0x348B, 0x3491, 0x3497, 0x349D, 0x34A5, 0x34AF, 0x34BB,
+ 0x34C9, 0x34D3, 0x34E1, 0x34F1, 0x34FF, 0x3509, 0x3517, 0x351D,
+ 0x352D, 0x3533, 0x353B, 0x3541, 0x3551, 0x3565, 0x356F, 0x3571,
+ 0x3577, 0x357B, 0x357D, 0x3581, 0x358D, 0x358F, 0x3599, 0x359B,
+ 0x35A1, 0x35B7, 0x35BD, 0x35BF, 0x35C3, 0x35D5, 0x35DD, 0x35E7,
+ 0x35EF, 0x3605, 0x3607, 0x3611, 0x3623, 0x3631, 0x3635, 0x3637,
+ 0x363B, 0x364D, 0x364F, 0x3653, 0x3659, 0x3661, 0x366B, 0x366D,
+ 0x368B, 0x368F, 0x36AD, 0x36AF, 0x36B9, 0x36BB, 0x36CD, 0x36D1,
+ 0x36E3, 0x36E9, 0x36F7, 0x3701, 0x3703, 0x3707, 0x371B, 0x373F,
+ 0x3745, 0x3749, 0x374F, 0x375D, 0x3761, 0x3775, 0x377F, 0x378D,
+ 0x37A3, 0x37A9, 0x37AB, 0x37C9, 0x37D5, 0x37DF, 0x37F1, 0x37F3,
+ 0x37F7, 0x3805, 0x380B, 0x3821, 0x3833, 0x3835, 0x3841, 0x3847,
+ 0x384B, 0x3853, 0x3857, 0x385F, 0x3865, 0x386F, 0x3871, 0x387D,
+ 0x388F, 0x3899, 0x38A7, 0x38B7, 0x38C5, 0x38C9, 0x38CF, 0x38D5,
+ 0x38D7, 0x38DD, 0x38E1, 0x38E3, 0x38FF, 0x3901, 0x391D, 0x3923,
+ 0x3925, 0x3929, 0x392F, 0x393D, 0x3941, 0x394D, 0x395B, 0x396B,
+ 0x3979, 0x397D, 0x3983, 0x398B, 0x3991, 0x3995, 0x399B, 0x39A1,
+ 0x39A7, 0x39AF, 0x39B3, 0x39BB, 0x39BF, 0x39CD, 0x39DD, 0x39E5,
+ 0x39EB, 0x39EF, 0x39FB, 0x3A03, 0x3A13, 0x3A15, 0x3A1F, 0x3A27,
+ 0x3A2B, 0x3A31, 0x3A4B, 0x3A51, 0x3A5B, 0x3A63, 0x3A67, 0x3A6D,
+ 0x3A79, 0x3A87, 0x3AA5, 0x3AA9, 0x3AB7, 0x3ACD, 0x3AD5, 0x3AE1,
+ 0x3AE5, 0x3AEB, 0x3AF3, 0x3AFD, 0x3B03, 0x3B11, 0x3B1B, 0x3B21,
+ 0x3B23, 0x3B2D, 0x3B39, 0x3B45, 0x3B53, 0x3B59, 0x3B5F, 0x3B71,
+ 0x3B7B, 0x3B81, 0x3B89, 0x3B9B, 0x3B9F, 0x3BA5, 0x3BA7, 0x3BAD,
+ 0x3BB7, 0x3BB9, 0x3BC3, 0x3BCB, 0x3BD1, 0x3BD7, 0x3BE1, 0x3BE3,
+ 0x3BF5, 0x3BFF, 0x3C01, 0x3C0D, 0x3C11, 0x3C17, 0x3C1F, 0x3C29,
+ 0x3C35, 0x3C43, 0x3C4F, 0x3C53, 0x3C5B, 0x3C65, 0x3C6B, 0x3C71,
+ 0x3C85, 0x3C89, 0x3C97, 0x3CA7, 0x3CB5, 0x3CBF, 0x3CC7, 0x3CD1,
+ 0x3CDD, 0x3CDF, 0x3CF1, 0x3CF7, 0x3D03, 0x3D0D, 0x3D19, 0x3D1B,
+ 0x3D1F, 0x3D21, 0x3D2D, 0x3D33, 0x3D37, 0x3D3F, 0x3D43, 0x3D6F,
+ 0x3D73, 0x3D75, 0x3D79, 0x3D7B, 0x3D85, 0x3D91, 0x3D97, 0x3D9D,
+ 0x3DAB, 0x3DAF, 0x3DB5, 0x3DBB, 0x3DC1, 0x3DC9, 0x3DCF, 0x3DF3,
+ 0x3E05, 0x3E09, 0x3E0F, 0x3E11, 0x3E1D, 0x3E23, 0x3E29, 0x3E2F,
+ 0x3E33, 0x3E41, 0x3E57, 0x3E63, 0x3E65, 0x3E77, 0x3E81, 0x3E87,
+ 0x3EA1, 0x3EB9, 0x3EBD, 0x3EBF, 0x3EC3, 0x3EC5, 0x3EC9, 0x3ED7,
+ 0x3EDB, 0x3EE1, 0x3EE7, 0x3EEF, 0x3EFF, 0x3F0B, 0x3F0D, 0x3F37,
+ 0x3F3B, 0x3F3D, 0x3F41, 0x3F59, 0x3F5F, 0x3F65, 0x3F67, 0x3F79,
+ 0x3F7D, 0x3F8B, 0x3F91, 0x3FAD, 0x3FBF, 0x3FCD, 0x3FD3, 0x3FDD,
+ 0x3FE9, 0x3FEB, 0x3FF1, 0x3FFD, 0x401B, 0x4021, 0x4025, 0x402B,
+ 0x4031, 0x403F, 0x4043, 0x4045, 0x405D, 0x4061, 0x4067, 0x406D,
+ 0x4087, 0x4091, 0x40A3, 0x40A9, 0x40B1, 0x40B7, 0x40BD, 0x40DB,
+ 0x40DF, 0x40EB, 0x40F7, 0x40F9, 0x4109, 0x410B, 0x4111, 0x4115,
+ 0x4121, 0x4133, 0x4135, 0x413B, 0x413F, 0x4159, 0x4165, 0x416B,
+ 0x4177, 0x417B, 0x4193, 0x41AB, 0x41B7, 0x41BD, 0x41BF, 0x41CB,
+ 0x41E7, 0x41EF, 0x41F3, 0x41F9, 0x4205, 0x4207, 0x4219, 0x421F,
+ 0x4223, 0x4229, 0x422F, 0x4243, 0x4253, 0x4255, 0x425B, 0x4261,
+ 0x4273, 0x427D, 0x4283, 0x4285, 0x4289, 0x4291, 0x4297, 0x429D,
+ 0x42B5, 0x42C5, 0x42CB, 0x42D3, 0x42DD, 0x42E3, 0x42F1, 0x4307,
+ 0x430F, 0x431F, 0x4325, 0x4327, 0x4333, 0x4337, 0x4339, 0x434F,
+ 0x4357, 0x4369, 0x438B, 0x438D, 0x4393, 0x43A5, 0x43A9, 0x43AF,
+ 0x43B5, 0x43BD, 0x43C7, 0x43CF, 0x43E1, 0x43E7, 0x43EB, 0x43ED,
+ 0x43F1, 0x43F9, 0x4409, 0x440B, 0x4417, 0x4423, 0x4429, 0x443B,
+ 0x443F, 0x4445, 0x444B, 0x4451, 0x4453, 0x4459, 0x4465, 0x446F,
+ 0x4483, 0x448F, 0x44A1, 0x44A5, 0x44AB, 0x44AD, 0x44BD, 0x44BF,
+ 0x44C9, 0x44D7, 0x44DB, 0x44F9, 0x44FB, 0x4505, 0x4511, 0x4513,
+ 0x452B, 0x4531, 0x4541, 0x4549, 0x4553, 0x4555, 0x4561, 0x4577,
+ 0x457D, 0x457F, 0x458F, 0x45A3, 0x45AD, 0x45AF, 0x45BB, 0x45C7,
+ 0x45D9, 0x45E3, 0x45EF, 0x45F5, 0x45F7, 0x4601, 0x4603, 0x4609,
+ 0x4613, 0x4625, 0x4627, 0x4633, 0x4639, 0x463D, 0x4643, 0x4645,
+ 0x465D, 0x4679, 0x467B, 0x467F, 0x4681, 0x468B, 0x468D, 0x469D,
+ 0x46A9, 0x46B1, 0x46C7, 0x46C9, 0x46CF, 0x46D3, 0x46D5, 0x46DF,
+ 0x46E5, 0x46F9, 0x4705, 0x470F, 0x4717, 0x4723, 0x4729, 0x472F,
+ 0x4735, 0x4739, 0x474B, 0x474D, 0x4751, 0x475D, 0x476F, 0x4771,
+ 0x477D, 0x4783, 0x4787, 0x4789, 0x4799, 0x47A5, 0x47B1, 0x47BF,
+ 0x47C3, 0x47CB, 0x47DD, 0x47E1, 0x47ED, 0x47FB, 0x4801, 0x4807,
+ 0x480B, 0x4813, 0x4819, 0x481D, 0x4831, 0x483D, 0x4847, 0x4855,
+ 0x4859, 0x485B, 0x486B, 0x486D, 0x4879, 0x4897, 0x489B, 0x48A1,
+ 0x48B9, 0x48CD, 0x48E5, 0x48EF, 0x48F7, 0x4903, 0x490D, 0x4919,
+ 0x491F, 0x492B, 0x4937, 0x493D, 0x4945, 0x4955, 0x4963, 0x4969,
+ 0x496D, 0x4973, 0x4997, 0x49AB, 0x49B5, 0x49D3, 0x49DF, 0x49E1,
+ 0x49E5, 0x49E7, 0x4A03, 0x4A0F, 0x4A1D, 0x4A23, 0x4A39, 0x4A41,
+ 0x4A45, 0x4A57, 0x4A5D, 0x4A6B, 0x4A7D, 0x4A81, 0x4A87, 0x4A89,
+ 0x4A8F, 0x4AB1, 0x4AC3, 0x4AC5, 0x4AD5, 0x4ADB, 0x4AED, 0x4AEF,
+ 0x4B07, 0x4B0B, 0x4B0D, 0x4B13, 0x4B1F, 0x4B25, 0x4B31, 0x4B3B,
+ 0x4B43, 0x4B49, 0x4B59, 0x4B65, 0x4B6D, 0x4B77, 0x4B85, 0x4BAD,
+ 0x4BB3, 0x4BB5, 0x4BBB, 0x4BBF, 0x4BCB, 0x4BD9, 0x4BDD, 0x4BDF,
+ 0x4BE3, 0x4BE5, 0x4BE9, 0x4BF1, 0x4BF7, 0x4C01, 0x4C07, 0x4C0D,
+ 0x4C0F, 0x4C15, 0x4C1B, 0x4C21, 0x4C2D, 0x4C33, 0x4C4B, 0x4C55,
+ 0x4C57, 0x4C61, 0x4C67, 0x4C73, 0x4C79, 0x4C7F, 0x4C8D, 0x4C93,
+ 0x4C99, 0x4CCD, 0x4CE1, 0x4CE7, 0x4CF1, 0x4CF3, 0x4CFD, 0x4D05,
+ 0x4D0F, 0x4D1B, 0x4D27, 0x4D29, 0x4D2F, 0x4D33, 0x4D41, 0x4D51,
+ 0x4D59, 0x4D65, 0x4D6B, 0x4D81, 0x4D83, 0x4D8D, 0x4D95, 0x4D9B,
+ 0x4DB1, 0x4DB3, 0x4DC9, 0x4DCF, 0x4DD7, 0x4DE1, 0x4DED, 0x4DF9,
+ 0x4DFB, 0x4E05, 0x4E0B, 0x4E17, 0x4E19, 0x4E1D, 0x4E2B, 0x4E35,
+ 0x4E37, 0x4E3D, 0x4E4F, 0x4E53, 0x4E5F, 0x4E67, 0x4E79, 0x4E85,
+ 0x4E8B, 0x4E91, 0x4E95, 0x4E9B, 0x4EA1, 0x4EAF, 0x4EB3, 0x4EB5,
+ 0x4EC1, 0x4ECD, 0x4ED1, 0x4ED7, 0x4EE9, 0x4EFB, 0x4F07, 0x4F09,
+ 0x4F19, 0x4F25, 0x4F2D, 0x4F3F, 0x4F49, 0x4F63, 0x4F67, 0x4F6D,
+ 0x4F75, 0x4F7B, 0x4F81, 0x4F85, 0x4F87, 0x4F91, 0x4FA5, 0x4FA9,
+ 0x4FAF, 0x4FB7, 0x4FBB, 0x4FCF, 0x4FD9, 0x4FDB, 0x4FFD, 0x4FFF,
+ 0x5003, 0x501B, 0x501D, 0x5029, 0x5035, 0x503F, 0x5045, 0x5047,
+ 0x5053, 0x5071, 0x5077, 0x5083, 0x5093, 0x509F, 0x50A1, 0x50B7,
+ 0x50C9, 0x50D5, 0x50E3, 0x50ED, 0x50EF, 0x50FB, 0x5107, 0x510B,
+ 0x510D, 0x5111, 0x5117, 0x5123, 0x5125, 0x5135, 0x5147, 0x5149,
+ 0x5171, 0x5179, 0x5189, 0x518F, 0x5197, 0x51A1, 0x51A3, 0x51A7,
+ 0x51B9, 0x51C1, 0x51CB, 0x51D3, 0x51DF, 0x51E3, 0x51F5, 0x51F7,
+ 0x5209, 0x5213, 0x5215, 0x5219, 0x521B, 0x521F, 0x5227, 0x5243,
+ 0x5245, 0x524B, 0x5261, 0x526D, 0x5273, 0x5281, 0x5293, 0x5297,
+ 0x529D, 0x52A5, 0x52AB, 0x52B1, 0x52BB, 0x52C3, 0x52C7, 0x52C9,
+ 0x52DB, 0x52E5, 0x52EB, 0x52FF, 0x5315, 0x531D, 0x5323, 0x5341,
+ 0x5345, 0x5347, 0x534B, 0x535D, 0x5363, 0x5381, 0x5383, 0x5387,
+ 0x538F, 0x5395, 0x5399, 0x539F, 0x53AB, 0x53B9, 0x53DB, 0x53E9,
+ 0x53EF, 0x53F3, 0x53F5, 0x53FB, 0x53FF, 0x540D, 0x5411, 0x5413,
+ 0x5419, 0x5435, 0x5437, 0x543B, 0x5441, 0x5449, 0x5453, 0x5455,
+ 0x545F, 0x5461, 0x546B, 0x546D, 0x5471, 0x548F, 0x5491, 0x549D,
+ 0x54A9, 0x54B3, 0x54C5, 0x54D1, 0x54DF, 0x54E9, 0x54EB, 0x54F7,
+ 0x54FD, 0x5507, 0x550D, 0x551B, 0x5527, 0x552B, 0x5539, 0x553D,
+ 0x554F, 0x5551, 0x555B, 0x5563, 0x5567, 0x556F, 0x5579, 0x5585,
+ 0x5597, 0x55A9, 0x55B1, 0x55B7, 0x55C9, 0x55D9, 0x55E7, 0x55ED,
+ 0x55F3, 0x55FD, 0x560B, 0x560F, 0x5615, 0x5617, 0x5623, 0x562F,
+ 0x5633, 0x5639, 0x563F, 0x564B, 0x564D, 0x565D, 0x565F, 0x566B,
+ 0x5671, 0x5675, 0x5683, 0x5689, 0x568D, 0x568F, 0x569B, 0x56AD,
+ 0x56B1, 0x56D5, 0x56E7, 0x56F3, 0x56FF, 0x5701, 0x5705, 0x5707,
+ 0x570B, 0x5713, 0x571F, 0x5723, 0x5747, 0x574D, 0x575F, 0x5761,
+ 0x576D, 0x5777, 0x577D, 0x5789, 0x57A1, 0x57A9, 0x57AF, 0x57B5,
+ 0x57C5, 0x57D1, 0x57D3, 0x57E5, 0x57EF, 0x5803, 0x580D, 0x580F,
+ 0x5815, 0x5827, 0x582B, 0x582D, 0x5855, 0x585B, 0x585D, 0x586D,
+ 0x586F, 0x5873, 0x587B, 0x588D, 0x5897, 0x58A3, 0x58A9, 0x58AB,
+ 0x58B5, 0x58BD, 0x58C1, 0x58C7, 0x58D3, 0x58D5, 0x58DF, 0x58F1,
+ 0x58F9, 0x58FF, 0x5903, 0x5917, 0x591B, 0x5921, 0x5945, 0x594B,
+ 0x594D, 0x5957, 0x595D, 0x5975, 0x597B, 0x5989, 0x5999, 0x599F,
+ 0x59B1, 0x59B3, 0x59BD, 0x59D1, 0x59DB, 0x59E3, 0x59E9, 0x59ED,
+ 0x59F3, 0x59F5, 0x59FF, 0x5A01, 0x5A0D, 0x5A11, 0x5A13, 0x5A17,
+ 0x5A1F, 0x5A29, 0x5A2F, 0x5A3B, 0x5A4D, 0x5A5B, 0x5A67, 0x5A77,
+ 0x5A7F, 0x5A85, 0x5A95, 0x5A9D, 0x5AA1, 0x5AA3, 0x5AA9, 0x5ABB,
+ 0x5AD3, 0x5AE5, 0x5AEF, 0x5AFB, 0x5AFD, 0x5B01, 0x5B0F, 0x5B19,
+ 0x5B1F, 0x5B25, 0x5B2B, 0x5B3D, 0x5B49, 0x5B4B, 0x5B67, 0x5B79,
+ 0x5B87, 0x5B97, 0x5BA3, 0x5BB1, 0x5BC9, 0x5BD5, 0x5BEB, 0x5BF1,
+ 0x5BF3, 0x5BFD, 0x5C05, 0x5C09, 0x5C0B, 0x5C0F, 0x5C1D, 0x5C29,
+ 0x5C2F, 0x5C33, 0x5C39, 0x5C47, 0x5C4B, 0x5C4D, 0x5C51, 0x5C6F,
+ 0x5C75, 0x5C77, 0x5C7D, 0x5C87, 0x5C89, 0x5CA7, 0x5CBD, 0x5CBF,
+ 0x5CC3, 0x5CC9, 0x5CD1, 0x5CD7, 0x5CDD, 0x5CED, 0x5CF9, 0x5D05,
+ 0x5D0B, 0x5D13, 0x5D17, 0x5D19, 0x5D31, 0x5D3D, 0x5D41, 0x5D47,
+ 0x5D4F, 0x5D55, 0x5D5B, 0x5D65, 0x5D67, 0x5D6D, 0x5D79, 0x5D95,
+ 0x5DA3, 0x5DA9, 0x5DAD, 0x5DB9, 0x5DC1, 0x5DC7, 0x5DD3, 0x5DD7,
+ 0x5DDD, 0x5DEB, 0x5DF1, 0x5DFD, 0x5E07, 0x5E0D, 0x5E13, 0x5E1B,
+ 0x5E21, 0x5E27, 0x5E2B, 0x5E2D, 0x5E31, 0x5E39, 0x5E45, 0x5E49,
+ 0x5E57, 0x5E69, 0x5E73, 0x5E75, 0x5E85, 0x5E8B, 0x5E9F, 0x5EA5,
+ 0x5EAF, 0x5EB7, 0x5EBB, 0x5ED9, 0x5EFD, 0x5F09, 0x5F11, 0x5F27,
+ 0x5F33, 0x5F35, 0x5F3B, 0x5F47, 0x5F57, 0x5F5D, 0x5F63, 0x5F65,
+ 0x5F77, 0x5F7B, 0x5F95, 0x5F99, 0x5FA1, 0x5FB3, 0x5FBD, 0x5FC5,
+ 0x5FCF, 0x5FD5, 0x5FE3, 0x5FE7, 0x5FFB, 0x6011, 0x6023, 0x602F,
+ 0x6037, 0x6053, 0x605F, 0x6065, 0x606B, 0x6073, 0x6079, 0x6085,
+ 0x609D, 0x60AD, 0x60BB, 0x60BF, 0x60CD, 0x60D9, 0x60DF, 0x60E9,
+ 0x60F5, 0x6109, 0x610F, 0x6113, 0x611B, 0x612D, 0x6139, 0x614B,
+ 0x6155, 0x6157, 0x615B, 0x616F, 0x6179, 0x6187, 0x618B, 0x6191,
+ 0x6193, 0x619D, 0x61B5, 0x61C7, 0x61C9, 0x61CD, 0x61E1, 0x61F1,
+ 0x61FF, 0x6209, 0x6217, 0x621D, 0x6221, 0x6227, 0x623B, 0x6241,
+ 0x624B, 0x6251, 0x6253, 0x625F, 0x6265, 0x6283, 0x628D, 0x6295,
+ 0x629B, 0x629F, 0x62A5, 0x62AD, 0x62D5, 0x62D7, 0x62DB, 0x62DD,
+ 0x62E9, 0x62FB, 0x62FF, 0x6305, 0x630D, 0x6317, 0x631D, 0x632F,
+ 0x6341, 0x6343, 0x634F, 0x635F, 0x6367, 0x636D, 0x6371, 0x6377,
+ 0x637D, 0x637F, 0x63B3, 0x63C1, 0x63C5, 0x63D9, 0x63E9, 0x63EB,
+ 0x63EF, 0x63F5, 0x6401, 0x6403, 0x6409, 0x6415, 0x6421, 0x6427,
+ 0x642B, 0x6439, 0x6443, 0x6449, 0x644F, 0x645D, 0x6467, 0x6475,
+ 0x6485, 0x648D, 0x6493, 0x649F, 0x64A3, 0x64AB, 0x64C1, 0x64C7,
+ 0x64C9, 0x64DB, 0x64F1, 0x64F7, 0x64F9, 0x650B, 0x6511, 0x6521,
+ 0x652F, 0x6539, 0x653F, 0x654B, 0x654D, 0x6553, 0x6557, 0x655F,
+ 0x6571, 0x657D, 0x658D, 0x658F, 0x6593, 0x65A1, 0x65A5, 0x65AD,
+ 0x65B9, 0x65C5, 0x65E3, 0x65F3, 0x65FB, 0x65FF, 0x6601, 0x6607,
+ 0x661D, 0x6629, 0x6631, 0x663B, 0x6641, 0x6647, 0x664D, 0x665B,
+ 0x6661, 0x6673, 0x667D, 0x6689, 0x668B, 0x6695, 0x6697, 0x669B,
+ 0x66B5, 0x66B9, 0x66C5, 0x66CD, 0x66D1, 0x66E3, 0x66EB, 0x66F5,
+ 0x6703, 0x6713, 0x6719, 0x671F, 0x6727, 0x6731, 0x6737, 0x673F,
+ 0x6745, 0x6751, 0x675B, 0x676F, 0x6779, 0x6781, 0x6785, 0x6791,
+ 0x67AB, 0x67BD, 0x67C1, 0x67CD, 0x67DF, 0x67E5, 0x6803, 0x6809,
+ 0x6811, 0x6817, 0x682D, 0x6839, 0x683B, 0x683F, 0x6845, 0x684B,
+ 0x684D, 0x6857, 0x6859, 0x685D, 0x6863, 0x6869, 0x686B, 0x6871,
+ 0x6887, 0x6899, 0x689F, 0x68B1, 0x68BD, 0x68C5, 0x68D1, 0x68D7,
+ 0x68E1, 0x68ED, 0x68EF, 0x68FF, 0x6901, 0x690B, 0x690D, 0x6917,
+ 0x6929, 0x692F, 0x6943, 0x6947, 0x6949, 0x694F, 0x6965, 0x696B,
+ 0x6971, 0x6983, 0x6989, 0x6997, 0x69A3, 0x69B3, 0x69B5, 0x69BB,
+ 0x69C1, 0x69C5, 0x69D3, 0x69DF, 0x69E3, 0x69E5, 0x69F7, 0x6A07,
+ 0x6A2B, 0x6A37, 0x6A3D, 0x6A4B, 0x6A67, 0x6A69, 0x6A75, 0x6A7B,
+ 0x6A87, 0x6A8D, 0x6A91, 0x6A93, 0x6AA3, 0x6AC1, 0x6AC9, 0x6AE1,
+ 0x6AE7, 0x6B05, 0x6B0F, 0x6B11, 0x6B23, 0x6B27, 0x6B2D, 0x6B39,
+ 0x6B41, 0x6B57, 0x6B59, 0x6B5F, 0x6B75, 0x6B87, 0x6B89, 0x6B93,
+ 0x6B95, 0x6B9F, 0x6BBD, 0x6BBF, 0x6BDB, 0x6BE1, 0x6BEF, 0x6BFF,
+ 0x6C05, 0x6C19, 0x6C29, 0x6C2B, 0x6C31, 0x6C35, 0x6C55, 0x6C59,
+ 0x6C5B, 0x6C5F, 0x6C65, 0x6C67, 0x6C73, 0x6C77, 0x6C7D, 0x6C83,
+ 0x6C8F, 0x6C91, 0x6C97, 0x6C9B, 0x6CA1, 0x6CA9, 0x6CAF, 0x6CB3,
+ 0x6CC7, 0x6CCB, 0x6CEB, 0x6CF5, 0x6CFD, 0x6D0D, 0x6D0F, 0x6D25,
+ 0x6D27, 0x6D2B, 0x6D31, 0x6D39, 0x6D3F, 0x6D4F, 0x6D5D, 0x6D61,
+ 0x6D73, 0x6D7B, 0x6D7F, 0x6D93, 0x6D99, 0x6DA5, 0x6DB1, 0x6DB7,
+ 0x6DC1, 0x6DC3, 0x6DCD, 0x6DCF, 0x6DDB, 0x6DF7, 0x6E03, 0x6E15,
+ 0x6E17, 0x6E29, 0x6E33, 0x6E3B, 0x6E45, 0x6E75, 0x6E77, 0x6E7B,
+ 0x6E81, 0x6E89, 0x6E93, 0x6E95, 0x6E9F, 0x6EBD, 0x6EBF, 0x6EE3,
+ 0x6EE9, 0x6EF3, 0x6EF9, 0x6EFB, 0x6F0D, 0x6F11, 0x6F17, 0x6F1F,
+ 0x6F2F, 0x6F3D, 0x6F4D, 0x6F53, 0x6F61, 0x6F65, 0x6F79, 0x6F7D,
+ 0x6F83, 0x6F85, 0x6F8F, 0x6F9B, 0x6F9D, 0x6FA3, 0x6FAF, 0x6FB5,
+ 0x6FBB, 0x6FBF, 0x6FCB, 0x6FCD, 0x6FD3, 0x6FD7, 0x6FE3, 0x6FE9,
+ 0x6FF1, 0x6FF5, 0x6FF7, 0x6FFD, 0x700F, 0x7019, 0x701F, 0x7027,
+ 0x7033, 0x7039, 0x704F, 0x7051, 0x7057, 0x7063, 0x7075, 0x7079,
+ 0x7087, 0x708D, 0x7091, 0x70A5, 0x70AB, 0x70BB, 0x70C3, 0x70C7,
+ 0x70CF, 0x70E5, 0x70ED, 0x70F9, 0x70FF, 0x7105, 0x7115, 0x7121,
+ 0x7133, 0x7151, 0x7159, 0x715D, 0x715F, 0x7163, 0x7169, 0x7183,
+ 0x7187, 0x7195, 0x71AD, 0x71C3, 0x71C9, 0x71CB, 0x71D1, 0x71DB,
+ 0x71E1, 0x71EF, 0x71F5, 0x71FB, 0x7207, 0x7211, 0x7217, 0x7219,
+ 0x7225, 0x722F, 0x723B, 0x7243, 0x7255, 0x7267, 0x7271, 0x7277,
+ 0x727F, 0x728F, 0x7295, 0x729B, 0x72A3, 0x72B3, 0x72C7, 0x72CB,
+ 0x72CD, 0x72D7, 0x72D9, 0x72E3, 0x72EF, 0x72F5, 0x72FD, 0x7303,
+ 0x730D, 0x7321, 0x732B, 0x733D, 0x7357, 0x735B, 0x7361, 0x737F,
+ 0x7381, 0x7385, 0x738D, 0x7393, 0x739F, 0x73AB, 0x73BD, 0x73C1,
+ 0x73C9, 0x73DF, 0x73E5, 0x73E7, 0x73F3, 0x7415, 0x741B, 0x742D,
+ 0x7439, 0x743F, 0x7441, 0x745D, 0x746B, 0x747B, 0x7489, 0x748D,
+ 0x749B, 0x74A7, 0x74AB, 0x74B1, 0x74B7, 0x74B9, 0x74DD, 0x74E1,
+ 0x74E7, 0x74FB, 0x7507, 0x751F, 0x7525, 0x753B, 0x753D, 0x754D,
+ 0x755F, 0x756B, 0x7577, 0x7589, 0x758B, 0x7591, 0x7597, 0x759D,
+ 0x75A1, 0x75A7, 0x75B5, 0x75B9, 0x75BB, 0x75D1, 0x75D9, 0x75E5,
+ 0x75EB, 0x75F5, 0x75FB, 0x7603, 0x760F, 0x7621, 0x762D, 0x7633,
+ 0x763D, 0x763F, 0x7655, 0x7663, 0x7669, 0x766F, 0x7673, 0x7685,
+ 0x768B, 0x769F, 0x76B5, 0x76B7, 0x76C3, 0x76DB, 0x76DF, 0x76F1,
+ 0x7703, 0x7705, 0x771B, 0x771D, 0x7721, 0x772D, 0x7735, 0x7741,
+ 0x774B, 0x7759, 0x775D, 0x775F, 0x7771, 0x7781, 0x77A7, 0x77AD,
+ 0x77B3, 0x77B9, 0x77C5, 0x77CF, 0x77D5, 0x77E1, 0x77E9, 0x77EF,
+ 0x77F3, 0x77F9, 0x7807, 0x7825, 0x782B, 0x7835, 0x783D, 0x7853,
+ 0x7859, 0x7861, 0x786D, 0x7877, 0x7879, 0x7883, 0x7885, 0x788B,
+ 0x7895, 0x7897, 0x78A1, 0x78AD, 0x78BF, 0x78D3, 0x78D9, 0x78DD,
+ 0x78E5, 0x78FB, 0x7901, 0x7907, 0x7925, 0x792B, 0x7939, 0x793F,
+ 0x794B, 0x7957, 0x795D, 0x7967, 0x7969, 0x7973, 0x7991, 0x7993,
+ 0x79A3, 0x79AB, 0x79AF, 0x79B1, 0x79B7, 0x79C9, 0x79CD, 0x79CF,
+ 0x79D5, 0x79D9, 0x79F3, 0x79F7, 0x79FF, 0x7A05, 0x7A0F, 0x7A11,
+ 0x7A15, 0x7A1B, 0x7A23, 0x7A27, 0x7A2D, 0x7A4B, 0x7A57, 0x7A59,
+ 0x7A5F, 0x7A65, 0x7A69, 0x7A7D, 0x7A93, 0x7A9B, 0x7A9F, 0x7AA1,
+ 0x7AA5, 0x7AED, 0x7AF5, 0x7AF9, 0x7B01, 0x7B17, 0x7B19, 0x7B1D,
+ 0x7B2B, 0x7B35, 0x7B37, 0x7B3B, 0x7B4F, 0x7B55, 0x7B5F, 0x7B71,
+ 0x7B77, 0x7B8B, 0x7B9B, 0x7BA1, 0x7BA9, 0x7BAF, 0x7BB3, 0x7BC7,
+ 0x7BD3, 0x7BE9, 0x7BEB, 0x7BEF, 0x7BF1, 0x7BFD, 0x7C07, 0x7C19,
+ 0x7C1B, 0x7C31, 0x7C37, 0x7C49, 0x7C67, 0x7C69, 0x7C73, 0x7C81,
+ 0x7C8B, 0x7C93, 0x7CA3, 0x7CD5, 0x7CDB, 0x7CE5, 0x7CED, 0x7CF7,
+ 0x7D03, 0x7D09, 0x7D1B, 0x7D1D, 0x7D33, 0x7D39, 0x7D3B, 0x7D3F,
+ 0x7D45, 0x7D4D, 0x7D53, 0x7D59, 0x7D63, 0x7D75, 0x7D77, 0x7D8D,
+ 0x7D8F, 0x7D9F, 0x7DAD, 0x7DB7, 0x7DBD, 0x7DBF, 0x7DCB, 0x7DD5,
+ 0x7DE9, 0x7DED, 0x7DFB, 0x7E01, 0x7E05, 0x7E29, 0x7E2B, 0x7E2F,
+ 0x7E35, 0x7E41, 0x7E43, 0x7E47, 0x7E55, 0x7E61, 0x7E67, 0x7E6B,
+ 0x7E71, 0x7E73, 0x7E79, 0x7E7D, 0x7E91, 0x7E9B, 0x7E9D, 0x7EA7,
+ 0x7EAD, 0x7EB9, 0x7EBB, 0x7ED3, 0x7EDF, 0x7EEB, 0x7EF1, 0x7EF7,
+ 0x7EFB, 0x7F13, 0x7F15, 0x7F19, 0x7F31, 0x7F33, 0x7F39, 0x7F3D,
+ 0x7F43, 0x7F4B, 0x7F5B, 0x7F61, 0x7F63, 0x7F6D, 0x7F79, 0x7F87,
+ 0x7F8D, 0x7FAF, 0x7FB5, 0x7FC3, 0x7FC9, 0x7FCD, 0x7FCF, 0x7FED,
+ 0x8003, 0x800B, 0x800F, 0x8015, 0x801D, 0x8021, 0x8023, 0x803F,
+ 0x8041, 0x8047, 0x804B, 0x8065, 0x8077, 0x808D, 0x808F, 0x8095,
+ 0x80A5, 0x80AB, 0x80AD, 0x80BD, 0x80C9, 0x80CB, 0x80D7, 0x80DB,
+ 0x80E1, 0x80E7, 0x80F5, 0x80FF, 0x8105, 0x810D, 0x8119, 0x811D,
+ 0x812F, 0x8131, 0x813B, 0x8143, 0x8153, 0x8159, 0x815F, 0x817D,
+ 0x817F, 0x8189, 0x819B, 0x819D, 0x81A7, 0x81AF, 0x81B3, 0x81BB,
+ 0x81C7, 0x81DF, 0x8207, 0x8209, 0x8215, 0x821F, 0x8225, 0x8231,
+ 0x8233, 0x823F, 0x8243, 0x8245, 0x8249, 0x824F, 0x8261, 0x826F,
+ 0x827B, 0x8281, 0x8285, 0x8293, 0x82B1, 0x82B5, 0x82BD, 0x82C7,
+ 0x82CF, 0x82D5, 0x82DF, 0x82F1, 0x82F9, 0x82FD, 0x830B, 0x831B,
+ 0x8321, 0x8329, 0x832D, 0x8333, 0x8335, 0x833F, 0x8341, 0x834D,
+ 0x8351, 0x8353, 0x8357, 0x835D, 0x8365, 0x8369, 0x836F, 0x838F,
+ 0x83A7, 0x83B1, 0x83B9, 0x83CB, 0x83D5, 0x83D7, 0x83DD, 0x83E7,
+ 0x83E9, 0x83ED, 0x83FF, 0x8405, 0x8411, 0x8413, 0x8423, 0x8425,
+ 0x843B, 0x8441, 0x8447, 0x844F, 0x8461, 0x8465, 0x8477, 0x8483,
+ 0x848B, 0x8491, 0x8495, 0x84A9, 0x84AF, 0x84CD, 0x84E3, 0x84EF,
+ 0x84F1, 0x84F7, 0x8509, 0x850D, 0x854B, 0x854F, 0x8551, 0x855D,
+ 0x8563, 0x856D, 0x856F, 0x857B, 0x8587, 0x85A3, 0x85A5, 0x85A9,
+ 0x85B7, 0x85CD, 0x85D3, 0x85D5, 0x85DB, 0x85E1, 0x85EB, 0x85F9,
+ 0x85FD, 0x85FF, 0x8609, 0x860F, 0x8617, 0x8621, 0x862F, 0x8639,
+ 0x863F, 0x8641, 0x864D, 0x8663, 0x8675, 0x867D, 0x8687, 0x8699,
+ 0x86A5, 0x86A7, 0x86B3, 0x86B7, 0x86C3, 0x86C5, 0x86CF, 0x86D1,
+ 0x86D7, 0x86E9, 0x86EF, 0x86F5, 0x8717, 0x871D, 0x871F, 0x872B,
+ 0x872F, 0x8735, 0x8747, 0x8759, 0x875B, 0x876B, 0x8771, 0x8777,
+ 0x877F, 0x8785, 0x878F, 0x87A1, 0x87A9, 0x87B3, 0x87BB, 0x87C5,
+ 0x87C7, 0x87CB, 0x87DD, 0x87F7, 0x8803, 0x8819, 0x881B, 0x881F,
+ 0x8821, 0x8837, 0x883D, 0x8843, 0x8851, 0x8861, 0x8867, 0x887B,
+ 0x8885, 0x8891, 0x8893, 0x88A5, 0x88CF, 0x88D3, 0x88EB, 0x88ED,
+ 0x88F3, 0x88FD, 0x8909, 0x890B, 0x8911, 0x891B, 0x8923, 0x8927,
+ 0x892D, 0x8939, 0x8945, 0x894D, 0x8951, 0x8957, 0x8963, 0x8981,
+ 0x8995, 0x899B, 0x89B3, 0x89B9, 0x89C3, 0x89CF, 0x89D1, 0x89DB,
+ 0x89EF, 0x89F5, 0x89FB, 0x89FF, 0x8A0B, 0x8A19, 0x8A23, 0x8A35,
+ 0x8A41, 0x8A49, 0x8A4F, 0x8A5B, 0x8A5F, 0x8A6D, 0x8A77, 0x8A79,
+ 0x8A85, 0x8AA3, 0x8AB3, 0x8AB5, 0x8AC1, 0x8AC7, 0x8ACB, 0x8ACD,
+ 0x8AD1, 0x8AD7, 0x8AF1, 0x8AF5, 0x8B07, 0x8B09, 0x8B0D, 0x8B13,
+ 0x8B21, 0x8B57, 0x8B5D, 0x8B91, 0x8B93, 0x8BA3, 0x8BA9, 0x8BAF,
+ 0x8BBB, 0x8BD5, 0x8BD9, 0x8BDB, 0x8BE1, 0x8BF7, 0x8BFD, 0x8BFF,
+ 0x8C0B, 0x8C17, 0x8C1D, 0x8C27, 0x8C39, 0x8C3B, 0x8C47, 0x8C53,
+ 0x8C5D, 0x8C6F, 0x8C7B, 0x8C81, 0x8C89, 0x8C8F, 0x8C99, 0x8C9F,
+ 0x8CA7, 0x8CAB, 0x8CAD, 0x8CB1, 0x8CC5, 0x8CDD, 0x8CE3, 0x8CE9,
+ 0x8CF3, 0x8D01, 0x8D0B, 0x8D0D, 0x8D23, 0x8D29, 0x8D37, 0x8D41,
+ 0x8D5B, 0x8D5F, 0x8D71, 0x8D79, 0x8D85, 0x8D91, 0x8D9B, 0x8DA7,
+ 0x8DAD, 0x8DB5, 0x8DC5, 0x8DCB, 0x8DD3, 0x8DD9, 0x8DDF, 0x8DF5,
+ 0x8DF7, 0x8E01, 0x8E15, 0x8E1F, 0x8E25, 0x8E51, 0x8E63, 0x8E69,
+ 0x8E73, 0x8E75, 0x8E79, 0x8E7F, 0x8E8D, 0x8E91, 0x8EAB, 0x8EAF,
+ 0x8EB1, 0x8EBD, 0x8EC7, 0x8ECF, 0x8ED3, 0x8EDB, 0x8EE7, 0x8EEB,
+ 0x8EF7, 0x8EFF, 0x8F15, 0x8F1D, 0x8F23, 0x8F2D, 0x8F3F, 0x8F45,
+ 0x8F4B, 0x8F53, 0x8F59, 0x8F65, 0x8F69, 0x8F71, 0x8F83, 0x8F8D,
+ 0x8F99, 0x8F9F, 0x8FAB, 0x8FAD, 0x8FB3, 0x8FB7, 0x8FB9, 0x8FC9,
+ 0x8FD5, 0x8FE1, 0x8FEF, 0x8FF9, 0x9007, 0x900D, 0x9017, 0x9023,
+ 0x9025, 0x9031, 0x9037, 0x903B, 0x9041, 0x9043, 0x904F, 0x9053,
+ 0x906D, 0x9073, 0x9085, 0x908B, 0x9095, 0x909B, 0x909D, 0x90AF,
+ 0x90B9, 0x90C1, 0x90C5, 0x90DF, 0x90E9, 0x90FD, 0x9103, 0x9113,
+ 0x9127, 0x9133, 0x913D, 0x9145, 0x914F, 0x9151, 0x9161, 0x9167,
+ 0x917B, 0x9185, 0x9199, 0x919D, 0x91BB, 0x91BD, 0x91C1, 0x91C9,
+ 0x91D9, 0x91DB, 0x91ED, 0x91F1, 0x91F3, 0x91F9, 0x9203, 0x9215,
+ 0x9221, 0x922F, 0x9241, 0x9247, 0x9257, 0x926B, 0x9271, 0x9275,
+ 0x927D, 0x9283, 0x9287, 0x928D, 0x9299, 0x92A1, 0x92AB, 0x92AD,
+ 0x92B9, 0x92BF, 0x92C3, 0x92C5, 0x92CB, 0x92D5, 0x92D7, 0x92E7,
+ 0x92F3, 0x9301, 0x930B, 0x9311, 0x9319, 0x931F, 0x933B, 0x933D,
+ 0x9343, 0x9355, 0x9373, 0x9395, 0x9397, 0x93A7, 0x93B3, 0x93B5,
+ 0x93C7, 0x93D7, 0x93DD, 0x93E5, 0x93EF, 0x93F7, 0x9401, 0x9409,
+ 0x9413, 0x943F, 0x9445, 0x944B, 0x944F, 0x9463, 0x9467, 0x9469,
+ 0x946D, 0x947B, 0x9497, 0x949F, 0x94A5, 0x94B5, 0x94C3, 0x94E1,
+ 0x94E7, 0x9505, 0x9509, 0x9517, 0x9521, 0x9527, 0x952D, 0x9535,
+ 0x9539, 0x954B, 0x9557, 0x955D, 0x955F, 0x9575, 0x9581, 0x9589,
+ 0x958F, 0x959B, 0x959F, 0x95AD, 0x95B1, 0x95B7, 0x95B9, 0x95BD,
+ 0x95CF, 0x95E3, 0x95E9, 0x95F9, 0x961F, 0x962F, 0x9631, 0x9635,
+ 0x963B, 0x963D, 0x9665, 0x968F, 0x969D, 0x96A1, 0x96A7, 0x96A9,
+ 0x96C1, 0x96CB, 0x96D1, 0x96D3, 0x96E5, 0x96EF, 0x96FB, 0x96FD,
+ 0x970D, 0x970F, 0x9715, 0x9725, 0x972B, 0x9733, 0x9737, 0x9739,
+ 0x9743, 0x9749, 0x9751, 0x975B, 0x975D, 0x976F, 0x977F, 0x9787,
+ 0x9793, 0x97A5, 0x97B1, 0x97B7, 0x97C3, 0x97CD, 0x97D3, 0x97D9,
+ 0x97EB, 0x97F7, 0x9805, 0x9809, 0x980B, 0x9815, 0x9829, 0x982F,
+ 0x983B, 0x9841, 0x9851, 0x986B, 0x986F, 0x9881, 0x9883, 0x9887,
+ 0x98A7, 0x98B1, 0x98B9, 0x98BF, 0x98C3, 0x98C9, 0x98CF, 0x98DD,
+ 0x98E3, 0x98F5, 0x98F9, 0x98FB, 0x990D, 0x9917, 0x991F, 0x9929,
+ 0x9931, 0x993B, 0x993D, 0x9941, 0x9947, 0x9949, 0x9953, 0x997D,
+ 0x9985, 0x9991, 0x9995, 0x999B, 0x99AD, 0x99AF, 0x99BF, 0x99C7,
+ 0x99CB, 0x99CD, 0x99D7, 0x99E5, 0x99F1, 0x99FB, 0x9A0F, 0x9A13,
+ 0x9A1B, 0x9A25, 0x9A4B, 0x9A4F, 0x9A55, 0x9A57, 0x9A61, 0x9A75,
+ 0x9A7F, 0x9A8B, 0x9A91, 0x9A9D, 0x9AB7, 0x9AC3, 0x9AC7, 0x9ACF,
+ 0x9AEB, 0x9AF3, 0x9AF7, 0x9AFF, 0x9B17, 0x9B1D, 0x9B27, 0x9B2F,
+ 0x9B35, 0x9B45, 0x9B51, 0x9B59, 0x9B63, 0x9B6F, 0x9B77, 0x9B8D,
+ 0x9B93, 0x9B95, 0x9B9F, 0x9BA1, 0x9BA7, 0x9BB1, 0x9BB7, 0x9BBD,
+ 0x9BC5, 0x9BCB, 0x9BCF, 0x9BDD, 0x9BF9, 0x9C01, 0x9C11, 0x9C23,
+ 0x9C2B, 0x9C2F, 0x9C35, 0x9C49, 0x9C4D, 0x9C5F, 0x9C65, 0x9C67,
+ 0x9C7F, 0x9C97, 0x9C9D, 0x9CA3, 0x9CAF, 0x9CBB, 0x9CBF, 0x9CC1,
+ 0x9CD7, 0x9CD9, 0x9CE3, 0x9CE9, 0x9CF1, 0x9CFD, 0x9D01, 0x9D15,
+ 0x9D27, 0x9D2D, 0x9D31, 0x9D3D, 0x9D55, 0x9D5B, 0x9D61, 0x9D97,
+ 0x9D9F, 0x9DA5, 0x9DA9, 0x9DC3, 0x9DE7, 0x9DEB, 0x9DED, 0x9DF1,
+ 0x9E0B, 0x9E17, 0x9E23, 0x9E27, 0x9E2D, 0x9E33, 0x9E3B, 0x9E47,
+ 0x9E51, 0x9E53, 0x9E5F, 0x9E6F, 0x9E81, 0x9E87, 0x9E8F, 0x9E95,
+ 0x9EA1, 0x9EB3, 0x9EBD, 0x9EBF, 0x9EF5, 0x9EF9, 0x9EFB, 0x9F05,
+ 0x9F23, 0x9F2F, 0x9F37, 0x9F3B, 0x9F43, 0x9F53, 0x9F61, 0x9F6D,
+ 0x9F73, 0x9F77, 0x9F7D, 0x9F89, 0x9F8F, 0x9F91, 0x9F95, 0x9FA3,
+ 0x9FAF, 0x9FB3, 0x9FC1, 0x9FC7, 0x9FDF, 0x9FE5, 0x9FEB, 0x9FF5,
+ 0xA001, 0xA00D, 0xA021, 0xA033, 0xA039, 0xA03F, 0xA04F, 0xA057,
+ 0xA05B, 0xA061, 0xA075, 0xA079, 0xA099, 0xA09D, 0xA0AB, 0xA0B5,
+ 0xA0B7, 0xA0BD, 0xA0C9, 0xA0D9, 0xA0DB, 0xA0DF, 0xA0E5, 0xA0F1,
+ 0xA0F3, 0xA0FD, 0xA105, 0xA10B, 0xA10F, 0xA111, 0xA11B, 0xA129,
+ 0xA12F, 0xA135, 0xA141, 0xA153, 0xA175, 0xA17D, 0xA187, 0xA18D,
+ 0xA1A5, 0xA1AB, 0xA1AD, 0xA1B7, 0xA1C3, 0xA1C5, 0xA1E3, 0xA1ED,
+ 0xA1FB, 0xA207, 0xA213, 0xA223, 0xA229, 0xA22F, 0xA231, 0xA243,
+ 0xA247, 0xA24D, 0xA26B, 0xA279, 0xA27D, 0xA283, 0xA289, 0xA28B,
+ 0xA291, 0xA295, 0xA29B, 0xA2A9, 0xA2AF, 0xA2B3, 0xA2BB, 0xA2C5,
+ 0xA2D1, 0xA2D7, 0xA2F7, 0xA301, 0xA309, 0xA31F, 0xA321, 0xA32B,
+ 0xA331, 0xA349, 0xA351, 0xA355, 0xA373, 0xA379, 0xA37B, 0xA387,
+ 0xA397, 0xA39F, 0xA3A5, 0xA3A9, 0xA3AF, 0xA3B7, 0xA3C7, 0xA3D5,
+ 0xA3DB, 0xA3E1, 0xA3E5, 0xA3E7, 0xA3F1, 0xA3FD, 0xA3FF, 0xA40F,
+ 0xA41D, 0xA421, 0xA423, 0xA427, 0xA43B, 0xA44D, 0xA457, 0xA459,
+ 0xA463, 0xA469, 0xA475, 0xA493, 0xA49B, 0xA4AD, 0xA4B9, 0xA4C3,
+ 0xA4C5, 0xA4CB, 0xA4D1, 0xA4D5, 0xA4E1, 0xA4ED, 0xA4EF, 0xA4F3,
+ 0xA4FF, 0xA511, 0xA529, 0xA52B, 0xA535, 0xA53B, 0xA543, 0xA553,
+ 0xA55B, 0xA561, 0xA56D, 0xA577, 0xA585, 0xA58B, 0xA597, 0xA59D,
+ 0xA5A3, 0xA5A7, 0xA5A9, 0xA5C1, 0xA5C5, 0xA5CB, 0xA5D3, 0xA5D9,
+ 0xA5DD, 0xA5DF, 0xA5E3, 0xA5E9, 0xA5F7, 0xA5FB, 0xA603, 0xA60D,
+ 0xA625, 0xA63D, 0xA649, 0xA64B, 0xA651, 0xA65D, 0xA673, 0xA691,
+ 0xA693, 0xA699, 0xA6AB, 0xA6B5, 0xA6BB, 0xA6C1, 0xA6C9, 0xA6CD,
+ 0xA6CF, 0xA6D5, 0xA6DF, 0xA6E7, 0xA6F1, 0xA6F7, 0xA6FF, 0xA70F,
+ 0xA715, 0xA723, 0xA729, 0xA72D, 0xA745, 0xA74D, 0xA757, 0xA759,
+ 0xA765, 0xA76B, 0xA76F, 0xA793, 0xA795, 0xA7AB, 0xA7B1, 0xA7B9,
+ 0xA7BF, 0xA7C9, 0xA7D1, 0xA7D7, 0xA7E3, 0xA7ED, 0xA7FB, 0xA805,
+ 0xA80B, 0xA81D, 0xA829, 0xA82B, 0xA837, 0xA83B, 0xA855, 0xA85F,
+ 0xA86D, 0xA87D, 0xA88F, 0xA897, 0xA8A9, 0xA8B5, 0xA8C1, 0xA8C7,
+ 0xA8D7, 0xA8E5, 0xA8FD, 0xA907, 0xA913, 0xA91B, 0xA931, 0xA937,
+ 0xA939, 0xA943, 0xA97F, 0xA985, 0xA987, 0xA98B, 0xA993, 0xA9A3,
+ 0xA9B1, 0xA9BB, 0xA9C1, 0xA9D9, 0xA9DF, 0xA9EB, 0xA9FD, 0xAA15,
+ 0xAA17, 0xAA35, 0xAA39, 0xAA3B, 0xAA47, 0xAA4D, 0xAA57, 0xAA59,
+ 0xAA5D, 0xAA6B, 0xAA71, 0xAA81, 0xAA83, 0xAA8D, 0xAA95, 0xAAAB,
+ 0xAABF, 0xAAC5, 0xAAC9, 0xAAE9, 0xAAEF, 0xAB01, 0xAB05, 0xAB07,
+ 0xAB0B, 0xAB0D, 0xAB11, 0xAB19, 0xAB4D, 0xAB5B, 0xAB71, 0xAB73,
+ 0xAB89, 0xAB9D, 0xABA7, 0xABAF, 0xABB9, 0xABBB, 0xABC1, 0xABC5,
+ 0xABD3, 0xABD7, 0xABDD, 0xABF1, 0xABF5, 0xABFB, 0xABFD, 0xAC09,
+ 0xAC15, 0xAC1B, 0xAC27, 0xAC37, 0xAC39, 0xAC45, 0xAC4F, 0xAC57,
+ 0xAC5B, 0xAC61, 0xAC63, 0xAC7F, 0xAC8B, 0xAC93, 0xAC9D, 0xACA9,
+ 0xACAB, 0xACAF, 0xACBD, 0xACD9, 0xACE1, 0xACE7, 0xACEB, 0xACED,
+ 0xACF1, 0xACF7, 0xACF9, 0xAD05, 0xAD3F, 0xAD45, 0xAD53, 0xAD5D,
+ 0xAD5F, 0xAD65, 0xAD81, 0xADA1, 0xADA5, 0xADC3, 0xADCB, 0xADD1,
+ 0xADD5, 0xADDB, 0xADE7, 0xADF3, 0xADF5, 0xADF9, 0xADFF, 0xAE05,
+ 0xAE13, 0xAE23, 0xAE2B, 0xAE49, 0xAE4D, 0xAE4F, 0xAE59, 0xAE61,
+ 0xAE67, 0xAE6B, 0xAE71, 0xAE8B, 0xAE8F, 0xAE9B, 0xAE9D, 0xAEA7,
+ 0xAEB9, 0xAEC5, 0xAED1, 0xAEE3, 0xAEE5, 0xAEE9, 0xAEF5, 0xAEFD,
+ 0xAF09, 0xAF13, 0xAF27, 0xAF2B, 0xAF33, 0xAF43, 0xAF4F, 0xAF57,
+ 0xAF5D, 0xAF6D, 0xAF75, 0xAF7F, 0xAF8B, 0xAF99, 0xAF9F, 0xAFA3,
+ 0xAFAB, 0xAFB7, 0xAFBB, 0xAFCF, 0xAFD5, 0xAFFD, 0xB005, 0xB015,
+ 0xB01B, 0xB03F, 0xB041, 0xB047, 0xB04B, 0xB051, 0xB053, 0xB069,
+ 0xB07B, 0xB07D, 0xB087, 0xB08D, 0xB0B1, 0xB0BF, 0xB0CB, 0xB0CF,
+ 0xB0E1, 0xB0E9, 0xB0ED, 0xB0FB, 0xB105, 0xB107, 0xB111, 0xB119,
+ 0xB11D, 0xB11F, 0xB131, 0xB141, 0xB14D, 0xB15B, 0xB165, 0xB173,
+ 0xB179, 0xB17F, 0xB1A9, 0xB1B3, 0xB1B9, 0xB1BF, 0xB1D3, 0xB1DD,
+ 0xB1E5, 0xB1F1, 0xB1F5, 0xB201, 0xB213, 0xB215, 0xB21F, 0xB22D,
+ 0xB23F, 0xB249, 0xB25B, 0xB263, 0xB269, 0xB26D, 0xB27B, 0xB281,
+ 0xB28B, 0xB2A9, 0xB2B7, 0xB2BD, 0xB2C3, 0xB2C7, 0xB2D3, 0xB2F9,
+ 0xB2FD, 0xB2FF, 0xB303, 0xB309, 0xB311, 0xB31D, 0xB327, 0xB32D,
+ 0xB33F, 0xB345, 0xB377, 0xB37D, 0xB381, 0xB387, 0xB393, 0xB39B,
+ 0xB3A5, 0xB3C5, 0xB3CB, 0xB3E1, 0xB3E3, 0xB3ED, 0xB3F9, 0xB40B,
+ 0xB40D, 0xB413, 0xB417, 0xB435, 0xB43D, 0xB443, 0xB449, 0xB45B,
+ 0xB465, 0xB467, 0xB46B, 0xB477, 0xB48B, 0xB495, 0xB49D, 0xB4B5,
+ 0xB4BF, 0xB4C1, 0xB4C7, 0xB4DD, 0xB4E3, 0xB4E5, 0xB4F7, 0xB501,
+ 0xB50D, 0xB50F, 0xB52D, 0xB53F, 0xB54B, 0xB567, 0xB569, 0xB56F,
+ 0xB573, 0xB579, 0xB587, 0xB58D, 0xB599, 0xB5A3, 0xB5AB, 0xB5AF,
+ 0xB5BB, 0xB5D5, 0xB5DF, 0xB5E7, 0xB5ED, 0xB5FD, 0xB5FF, 0xB609,
+ 0xB61B, 0xB629, 0xB62F, 0xB633, 0xB639, 0xB647, 0xB657, 0xB659,
+ 0xB65F, 0xB663, 0xB66F, 0xB683, 0xB687, 0xB69B, 0xB69F, 0xB6A5,
+ 0xB6B1, 0xB6B3, 0xB6D7, 0xB6DB, 0xB6E1, 0xB6E3, 0xB6ED, 0xB6EF,
+ 0xB705, 0xB70D, 0xB713, 0xB71D, 0xB729, 0xB735, 0xB747, 0xB755,
+ 0xB76D, 0xB791, 0xB795, 0xB7A9, 0xB7C1, 0xB7CB, 0xB7D1, 0xB7D3,
+ 0xB7EF, 0xB7F5, 0xB807, 0xB80F, 0xB813, 0xB819, 0xB821, 0xB827,
+ 0xB82B, 0xB82D, 0xB839, 0xB855, 0xB867, 0xB875, 0xB885, 0xB893,
+ 0xB8A5, 0xB8AF, 0xB8B7, 0xB8BD, 0xB8C1, 0xB8C7, 0xB8CD, 0xB8D5,
+ 0xB8EB, 0xB8F7, 0xB8F9, 0xB903, 0xB915, 0xB91B, 0xB91D, 0xB92F,
+ 0xB939, 0xB93B, 0xB947, 0xB951, 0xB963, 0xB983, 0xB989, 0xB98D,
+ 0xB993, 0xB999, 0xB9A1, 0xB9A7, 0xB9AD, 0xB9B7, 0xB9CB, 0xB9D1,
+ 0xB9DD, 0xB9E7, 0xB9EF, 0xB9F9, 0xBA07, 0xBA0D, 0xBA17, 0xBA25,
+ 0xBA29, 0xBA2B, 0xBA41, 0xBA53, 0xBA55, 0xBA5F, 0xBA61, 0xBA65,
+ 0xBA79, 0xBA7D, 0xBA7F, 0xBAA1, 0xBAA3, 0xBAAF, 0xBAB5, 0xBABF,
+ 0xBAC1, 0xBACB, 0xBADD, 0xBAE3, 0xBAF1, 0xBAFD, 0xBB09, 0xBB1F,
+ 0xBB27, 0xBB2D, 0xBB3D, 0xBB43, 0xBB4B, 0xBB4F, 0xBB5B, 0xBB61,
+ 0xBB69, 0xBB6D, 0xBB91, 0xBB97, 0xBB9D, 0xBBB1, 0xBBC9, 0xBBCF,
+ 0xBBDB, 0xBBED, 0xBBF7, 0xBBF9, 0xBC03, 0xBC1D, 0xBC23, 0xBC33,
+ 0xBC3B, 0xBC41, 0xBC45, 0xBC5D, 0xBC6F, 0xBC77, 0xBC83, 0xBC8F,
+ 0xBC99, 0xBCAB, 0xBCB7, 0xBCB9, 0xBCD1, 0xBCD5, 0xBCE1, 0xBCF3,
+ 0xBCFF, 0xBD0D, 0xBD17, 0xBD19, 0xBD1D, 0xBD35, 0xBD41, 0xBD4F,
+ 0xBD59, 0xBD5F, 0xBD61, 0xBD67, 0xBD6B, 0xBD71, 0xBD8B, 0xBD8F,
+ 0xBD95, 0xBD9B, 0xBD9D, 0xBDB3, 0xBDBB, 0xBDCD, 0xBDD1, 0xBDE3,
+ 0xBDEB, 0xBDEF, 0xBE07, 0xBE09, 0xBE15, 0xBE21, 0xBE25, 0xBE27,
+ 0xBE5B, 0xBE5D, 0xBE6F, 0xBE75, 0xBE79, 0xBE7F, 0xBE8B, 0xBE8D,
+ 0xBE93, 0xBE9F, 0xBEA9, 0xBEB1, 0xBEB5, 0xBEB7, 0xBECF, 0xBED9,
+ 0xBEDB, 0xBEE5, 0xBEE7, 0xBEF3, 0xBEF9, 0xBF0B, 0xBF33, 0xBF39,
+ 0xBF4D, 0xBF5D, 0xBF5F, 0xBF6B, 0xBF71, 0xBF7B, 0xBF87, 0xBF89,
+ 0xBF8D, 0xBF93, 0xBFA1, 0xBFAD, 0xBFB9, 0xBFCF, 0xBFD5, 0xBFDD,
+ 0xBFE1, 0xBFE3, 0xBFF3, 0xC005, 0xC011, 0xC013, 0xC019, 0xC029,
+ 0xC02F, 0xC031, 0xC037, 0xC03B, 0xC047, 0xC065, 0xC06D, 0xC07D,
+ 0xC07F, 0xC091, 0xC09B, 0xC0B3, 0xC0B5, 0xC0BB, 0xC0D3, 0xC0D7,
+ 0xC0D9, 0xC0EF, 0xC0F1, 0xC101, 0xC103, 0xC109, 0xC115, 0xC119,
+ 0xC12B, 0xC133, 0xC137, 0xC145, 0xC149, 0xC15B, 0xC173, 0xC179,
+ 0xC17B, 0xC181, 0xC18B, 0xC18D, 0xC197, 0xC1BD, 0xC1C3, 0xC1CD,
+ 0xC1DB, 0xC1E1, 0xC1E7, 0xC1FF, 0xC203, 0xC205, 0xC211, 0xC221,
+ 0xC22F, 0xC23F, 0xC24B, 0xC24D, 0xC253, 0xC25D, 0xC277, 0xC27B,
+ 0xC27D, 0xC289, 0xC28F, 0xC293, 0xC29F, 0xC2A7, 0xC2B3, 0xC2BD,
+ 0xC2CF, 0xC2D5, 0xC2E3, 0xC2FF, 0xC301, 0xC307, 0xC311, 0xC313,
+ 0xC317, 0xC325, 0xC347, 0xC349, 0xC34F, 0xC365, 0xC367, 0xC371,
+ 0xC37F, 0xC383, 0xC385, 0xC395, 0xC39D, 0xC3A7, 0xC3AD, 0xC3B5,
+ 0xC3BF, 0xC3C7, 0xC3CB, 0xC3D1, 0xC3D3, 0xC3E3, 0xC3E9, 0xC3EF,
+ 0xC401, 0xC41F, 0xC42D, 0xC433, 0xC437, 0xC455, 0xC457, 0xC461,
+ 0xC46F, 0xC473, 0xC487, 0xC491, 0xC499, 0xC49D, 0xC4A5, 0xC4B7,
+ 0xC4BB, 0xC4C9, 0xC4CF, 0xC4D3, 0xC4EB, 0xC4F1, 0xC4F7, 0xC509,
+ 0xC51B, 0xC51D, 0xC541, 0xC547, 0xC551, 0xC55F, 0xC56B, 0xC56F,
+ 0xC575, 0xC577, 0xC595, 0xC59B, 0xC59F, 0xC5A1, 0xC5A7, 0xC5C3,
+ 0xC5D7, 0xC5DB, 0xC5EF, 0xC5FB, 0xC613, 0xC623, 0xC635, 0xC641,
+ 0xC64F, 0xC655, 0xC659, 0xC665, 0xC685, 0xC691, 0xC697, 0xC6A1,
+ 0xC6A9, 0xC6B3, 0xC6B9, 0xC6CB, 0xC6CD, 0xC6DD, 0xC6EB, 0xC6F1,
+ 0xC707, 0xC70D, 0xC719, 0xC71B, 0xC72D, 0xC731, 0xC739, 0xC757,
+ 0xC763, 0xC767, 0xC773, 0xC775, 0xC77F, 0xC7A5, 0xC7BB, 0xC7BD,
+ 0xC7C1, 0xC7CF, 0xC7D5, 0xC7E1, 0xC7F9, 0xC7FD, 0xC7FF, 0xC803,
+ 0xC811, 0xC81D, 0xC827, 0xC829, 0xC839, 0xC83F, 0xC853, 0xC857,
+ 0xC86B, 0xC881, 0xC88D, 0xC88F, 0xC893, 0xC895, 0xC8A1, 0xC8B7,
+ 0xC8CF, 0xC8D5, 0xC8DB, 0xC8DD, 0xC8E3, 0xC8E7, 0xC8ED, 0xC8EF,
+ 0xC8F9, 0xC905, 0xC911, 0xC917, 0xC919, 0xC91F, 0xC92F, 0xC937,
+ 0xC93D, 0xC941, 0xC953, 0xC95F, 0xC96B, 0xC979, 0xC97D, 0xC989,
+ 0xC98F, 0xC997, 0xC99D, 0xC9AF, 0xC9B5, 0xC9BF, 0xC9CB, 0xC9D9,
+ 0xC9DF, 0xC9E3, 0xC9EB, 0xCA01, 0xCA07, 0xCA09, 0xCA25, 0xCA37,
+ 0xCA39, 0xCA4B, 0xCA55, 0xCA5B, 0xCA69, 0xCA73, 0xCA75, 0xCA7F,
+ 0xCA8D, 0xCA93, 0xCA9D, 0xCA9F, 0xCAB5, 0xCABB, 0xCAC3, 0xCAC9,
+ 0xCAD9, 0xCAE5, 0xCAED, 0xCB03, 0xCB05, 0xCB09, 0xCB17, 0xCB29,
+ 0xCB35, 0xCB3B, 0xCB53, 0xCB59, 0xCB63, 0xCB65, 0xCB71, 0xCB87,
+ 0xCB99, 0xCB9F, 0xCBB3, 0xCBB9, 0xCBC3, 0xCBD1, 0xCBD5, 0xCBD7,
+ 0xCBDD, 0xCBE9, 0xCBFF, 0xCC0D, 0xCC19, 0xCC1D, 0xCC23, 0xCC2B,
+ 0xCC41, 0xCC43, 0xCC4D, 0xCC59, 0xCC61, 0xCC89, 0xCC8B, 0xCC91,
+ 0xCC9B, 0xCCA3, 0xCCA7, 0xCCD1, 0xCCE5, 0xCCE9, 0xCD09, 0xCD15,
+ 0xCD1F, 0xCD25, 0xCD31, 0xCD3D, 0xCD3F, 0xCD49, 0xCD51, 0xCD57,
+ 0xCD5B, 0xCD63, 0xCD67, 0xCD81, 0xCD93, 0xCD97, 0xCD9F, 0xCDBB,
+ 0xCDC1, 0xCDD3, 0xCDD9, 0xCDE5, 0xCDE7, 0xCDF1, 0xCDF7, 0xCDFD,
+ 0xCE0B, 0xCE15, 0xCE21, 0xCE2F, 0xCE47, 0xCE4D, 0xCE51, 0xCE65,
+ 0xCE7B, 0xCE7D, 0xCE8F, 0xCE93, 0xCE99, 0xCEA5, 0xCEA7, 0xCEB7,
+ 0xCEC9, 0xCED7, 0xCEDD, 0xCEE3, 0xCEE7, 0xCEED, 0xCEF5, 0xCF07,
+ 0xCF0B, 0xCF19, 0xCF37, 0xCF3B, 0xCF4D, 0xCF55, 0xCF5F, 0xCF61,
+ 0xCF65, 0xCF6D, 0xCF79, 0xCF7D, 0xCF89, 0xCF9B, 0xCF9D, 0xCFA9,
+ 0xCFB3, 0xCFB5, 0xCFC5, 0xCFCD, 0xCFD1, 0xCFEF, 0xCFF1, 0xCFF7,
+ 0xD013, 0xD015, 0xD01F, 0xD021, 0xD033, 0xD03D, 0xD04B, 0xD04F,
+ 0xD069, 0xD06F, 0xD081, 0xD085, 0xD099, 0xD09F, 0xD0A3, 0xD0AB,
+ 0xD0BD, 0xD0C1, 0xD0CD, 0xD0E7, 0xD0FF, 0xD103, 0xD117, 0xD12D,
+ 0xD12F, 0xD141, 0xD157, 0xD159, 0xD15D, 0xD169, 0xD16B, 0xD171,
+ 0xD177, 0xD17D, 0xD181, 0xD187, 0xD195, 0xD199, 0xD1B1, 0xD1BD,
+ 0xD1C3, 0xD1D5, 0xD1D7, 0xD1E3, 0xD1FF, 0xD20D, 0xD211, 0xD217,
+ 0xD21F, 0xD235, 0xD23B, 0xD247, 0xD259, 0xD261, 0xD265, 0xD279,
+ 0xD27F, 0xD283, 0xD289, 0xD28B, 0xD29D, 0xD2A3, 0xD2A7, 0xD2B3,
+ 0xD2BF, 0xD2C7, 0xD2E3, 0xD2E9, 0xD2F1, 0xD2FB, 0xD2FD, 0xD315,
+ 0xD321, 0xD32B, 0xD343, 0xD34B, 0xD355, 0xD369, 0xD375, 0xD37B,
+ 0xD387, 0xD393, 0xD397, 0xD3A5, 0xD3B1, 0xD3C9, 0xD3EB, 0xD3FD,
+ 0xD405, 0xD40F, 0xD415, 0xD427, 0xD42F, 0xD433, 0xD43B, 0xD44B,
+ 0xD459, 0xD45F, 0xD463, 0xD469, 0xD481, 0xD483, 0xD489, 0xD48D,
+ 0xD493, 0xD495, 0xD4A5, 0xD4AB, 0xD4B1, 0xD4C5, 0xD4DD, 0xD4E1,
+ 0xD4E3, 0xD4E7, 0xD4F5, 0xD4F9, 0xD50B, 0xD50D, 0xD513, 0xD51F,
+ 0xD523, 0xD531, 0xD535, 0xD537, 0xD549, 0xD559, 0xD55F, 0xD565,
+ 0xD567, 0xD577, 0xD58B, 0xD591, 0xD597, 0xD5B5, 0xD5B9, 0xD5C1,
+ 0xD5C7, 0xD5DF, 0xD5EF, 0xD5F5, 0xD5FB, 0xD603, 0xD60F, 0xD62D,
+ 0xD631, 0xD643, 0xD655, 0xD65D, 0xD661, 0xD67B, 0xD685, 0xD687,
+ 0xD69D, 0xD6A5, 0xD6AF, 0xD6BD, 0xD6C3, 0xD6C7, 0xD6D9, 0xD6E1,
+ 0xD6ED, 0xD709, 0xD70B, 0xD711, 0xD715, 0xD721, 0xD727, 0xD73F,
+ 0xD745, 0xD74D, 0xD757, 0xD76B, 0xD77B, 0xD783, 0xD7A1, 0xD7A7,
+ 0xD7AD, 0xD7B1, 0xD7B3, 0xD7BD, 0xD7CB, 0xD7D1, 0xD7DB, 0xD7FB,
+ 0xD811, 0xD823, 0xD825, 0xD829, 0xD82B, 0xD82F, 0xD837, 0xD84D,
+ 0xD855, 0xD867, 0xD873, 0xD88F, 0xD891, 0xD8A1, 0xD8AD, 0xD8BF,
+ 0xD8CD, 0xD8D7, 0xD8E9, 0xD8F5, 0xD8FB, 0xD91B, 0xD925, 0xD933,
+ 0xD939, 0xD943, 0xD945, 0xD94F, 0xD951, 0xD957, 0xD96D, 0xD96F,
+ 0xD973, 0xD979, 0xD981, 0xD98B, 0xD991, 0xD99F, 0xD9A5, 0xD9A9,
+ 0xD9B5, 0xD9D3, 0xD9EB, 0xD9F1, 0xD9F7, 0xD9FF, 0xDA05, 0xDA09,
+ 0xDA0B, 0xDA0F, 0xDA15, 0xDA1D, 0xDA23, 0xDA29, 0xDA3F, 0xDA51,
+ 0xDA59, 0xDA5D, 0xDA5F, 0xDA71, 0xDA77, 0xDA7B, 0xDA7D, 0xDA8D,
+ 0xDA9F, 0xDAB3, 0xDABD, 0xDAC3, 0xDAC9, 0xDAE7, 0xDAE9, 0xDAF5,
+ 0xDB11, 0xDB17, 0xDB1D, 0xDB23, 0xDB25, 0xDB31, 0xDB3B, 0xDB43,
+ 0xDB55, 0xDB67, 0xDB6B, 0xDB73, 0xDB85, 0xDB8F, 0xDB91, 0xDBAD,
+ 0xDBAF, 0xDBB9, 0xDBC7, 0xDBCB, 0xDBCD, 0xDBEB, 0xDBF7, 0xDC0D,
+ 0xDC27, 0xDC31, 0xDC39, 0xDC3F, 0xDC49, 0xDC51, 0xDC61, 0xDC6F,
+ 0xDC75, 0xDC7B, 0xDC85, 0xDC93, 0xDC99, 0xDC9D, 0xDC9F, 0xDCA9,
+ 0xDCB5, 0xDCB7, 0xDCBD, 0xDCC7, 0xDCCF, 0xDCD3, 0xDCD5, 0xDCDF,
+ 0xDCF9, 0xDD0F, 0xDD15, 0xDD17, 0xDD23, 0xDD35, 0xDD39, 0xDD53,
+ 0xDD57, 0xDD5F, 0xDD69, 0xDD6F, 0xDD7D, 0xDD87, 0xDD89, 0xDD9B,
+ 0xDDA1, 0xDDAB, 0xDDBF, 0xDDC5, 0xDDCB, 0xDDCF, 0xDDE7, 0xDDE9,
+ 0xDDED, 0xDDF5, 0xDDFB, 0xDE0B, 0xDE19, 0xDE29, 0xDE3B, 0xDE3D,
+ 0xDE41, 0xDE4D, 0xDE4F, 0xDE59, 0xDE5B, 0xDE61, 0xDE6D, 0xDE77,
+ 0xDE7D, 0xDE83, 0xDE97, 0xDE9D, 0xDEA1, 0xDEA7, 0xDECD, 0xDED1,
+ 0xDED7, 0xDEE3, 0xDEF1, 0xDEF5, 0xDF01, 0xDF09, 0xDF13, 0xDF1F,
+ 0xDF2B, 0xDF33, 0xDF37, 0xDF3D, 0xDF4B, 0xDF55, 0xDF5B, 0xDF67,
+ 0xDF69, 0xDF73, 0xDF85, 0xDF87, 0xDF99, 0xDFA3, 0xDFAB, 0xDFB5,
+ 0xDFB7, 0xDFC3, 0xDFC7, 0xDFD5, 0xDFF1, 0xDFF3, 0xE003, 0xE005,
+ 0xE017, 0xE01D, 0xE027, 0xE02D, 0xE035, 0xE045, 0xE053, 0xE071,
+ 0xE07B, 0xE08F, 0xE095, 0xE09F, 0xE0B7, 0xE0B9, 0xE0D5, 0xE0D7,
+ 0xE0E3, 0xE0F3, 0xE0F9, 0xE101, 0xE125, 0xE129, 0xE131, 0xE135,
+ 0xE143, 0xE14F, 0xE159, 0xE161, 0xE16D, 0xE171, 0xE177, 0xE17F,
+ 0xE183, 0xE189, 0xE197, 0xE1AD, 0xE1B5, 0xE1BB, 0xE1BF, 0xE1C1,
+ 0xE1CB, 0xE1D1, 0xE1E5, 0xE1EF, 0xE1F7, 0xE1FD, 0xE203, 0xE219,
+ 0xE22B, 0xE22D, 0xE23D, 0xE243, 0xE257, 0xE25B, 0xE275, 0xE279,
+ 0xE287, 0xE29D, 0xE2AB, 0xE2AF, 0xE2BB, 0xE2C1, 0xE2C9, 0xE2CD,
+ 0xE2D3, 0xE2D9, 0xE2F3, 0xE2FD, 0xE2FF, 0xE311, 0xE323, 0xE327,
+ 0xE329, 0xE339, 0xE33B, 0xE34D, 0xE351, 0xE357, 0xE35F, 0xE363,
+ 0xE369, 0xE375, 0xE377, 0xE37D, 0xE383, 0xE39F, 0xE3C5, 0xE3C9,
+ 0xE3D1, 0xE3E1, 0xE3FB, 0xE3FF, 0xE401, 0xE40B, 0xE417, 0xE419,
+ 0xE423, 0xE42B, 0xE431, 0xE43B, 0xE447, 0xE449, 0xE453, 0xE455,
+ 0xE46D, 0xE471, 0xE48F, 0xE4A9, 0xE4AF, 0xE4B5, 0xE4C7, 0xE4CD,
+ 0xE4D3, 0xE4E9, 0xE4EB, 0xE4F5, 0xE507, 0xE521, 0xE525, 0xE537,
+ 0xE53F, 0xE545, 0xE54B, 0xE557, 0xE567, 0xE56D, 0xE575, 0xE585,
+ 0xE58B, 0xE593, 0xE5A3, 0xE5A5, 0xE5CF, 0xE609, 0xE611, 0xE615,
+ 0xE61B, 0xE61D, 0xE621, 0xE629, 0xE639, 0xE63F, 0xE653, 0xE657,
+ 0xE663, 0xE66F, 0xE675, 0xE681, 0xE683, 0xE68D, 0xE68F, 0xE695,
+ 0xE6AB, 0xE6AD, 0xE6B7, 0xE6BD, 0xE6C5, 0xE6CB, 0xE6D5, 0xE6E3,
+ 0xE6E9, 0xE6EF, 0xE6F3, 0xE705, 0xE70D, 0xE717, 0xE71F, 0xE72F,
+ 0xE73D, 0xE747, 0xE749, 0xE753, 0xE755, 0xE761, 0xE767, 0xE76B,
+ 0xE77F, 0xE789, 0xE791, 0xE7C5, 0xE7CD, 0xE7D7, 0xE7DD, 0xE7DF,
+ 0xE7E9, 0xE7F1, 0xE7FB, 0xE801, 0xE807, 0xE80F, 0xE819, 0xE81B,
+ 0xE831, 0xE833, 0xE837, 0xE83D, 0xE84B, 0xE84F, 0xE851, 0xE869,
+ 0xE875, 0xE879, 0xE893, 0xE8A5, 0xE8A9, 0xE8AF, 0xE8BD, 0xE8DB,
+ 0xE8E1, 0xE8E5, 0xE8EB, 0xE8ED, 0xE903, 0xE90B, 0xE90F, 0xE915,
+ 0xE917, 0xE92D, 0xE933, 0xE93B, 0xE94B, 0xE951, 0xE95F, 0xE963,
+ 0xE969, 0xE97B, 0xE983, 0xE98F, 0xE995, 0xE9A1, 0xE9B9, 0xE9D7,
+ 0xE9E7, 0xE9EF, 0xEA11, 0xEA19, 0xEA2F, 0xEA35, 0xEA43, 0xEA4D,
+ 0xEA5F, 0xEA6D, 0xEA71, 0xEA7D, 0xEA85, 0xEA89, 0xEAAD, 0xEAB3,
+ 0xEAB9, 0xEABB, 0xEAC5, 0xEAC7, 0xEACB, 0xEADF, 0xEAE5, 0xEAEB,
+ 0xEAF5, 0xEB01, 0xEB07, 0xEB09, 0xEB31, 0xEB39, 0xEB3F, 0xEB5B,
+ 0xEB61, 0xEB63, 0xEB6F, 0xEB81, 0xEB85, 0xEB9D, 0xEBAB, 0xEBB1,
+ 0xEBB7, 0xEBC1, 0xEBD5, 0xEBDF, 0xEBED, 0xEBFD, 0xEC0B, 0xEC1B,
+ 0xEC21, 0xEC29, 0xEC4D, 0xEC51, 0xEC5D, 0xEC69, 0xEC6F, 0xEC7B,
+ 0xECAD, 0xECB9, 0xECBF, 0xECC3, 0xECC9, 0xECCF, 0xECD7, 0xECDD,
+ 0xECE7, 0xECE9, 0xECF3, 0xECF5, 0xED07, 0xED11, 0xED1F, 0xED2F,
+ 0xED37, 0xED3D, 0xED41, 0xED55, 0xED59, 0xED5B, 0xED65, 0xED6B,
+ 0xED79, 0xED8B, 0xED95, 0xEDBB, 0xEDC5, 0xEDD7, 0xEDD9, 0xEDE3,
+ 0xEDE5, 0xEDF1, 0xEDF5, 0xEDF7, 0xEDFB, 0xEE09, 0xEE0F, 0xEE19,
+ 0xEE21, 0xEE49, 0xEE4F, 0xEE63, 0xEE67, 0xEE73, 0xEE7B, 0xEE81,
+ 0xEEA3, 0xEEAB, 0xEEC1, 0xEEC9, 0xEED5, 0xEEDF, 0xEEE1, 0xEEF1,
+ 0xEF1B, 0xEF27, 0xEF2F, 0xEF45, 0xEF4D, 0xEF63, 0xEF6B, 0xEF71,
+ 0xEF93, 0xEF95, 0xEF9B, 0xEF9F, 0xEFAD, 0xEFB3, 0xEFC3, 0xEFC5,
+ 0xEFDB, 0xEFE1, 0xEFE9, 0xF001, 0xF017, 0xF01D, 0xF01F, 0xF02B,
+ 0xF02F, 0xF035, 0xF043, 0xF047, 0xF04F, 0xF067, 0xF06B, 0xF071,
+ 0xF077, 0xF079, 0xF08F, 0xF0A3, 0xF0A9, 0xF0AD, 0xF0BB, 0xF0BF,
+ 0xF0C5, 0xF0CB, 0xF0D3, 0xF0D9, 0xF0E3, 0xF0E9, 0xF0F1, 0xF0F7,
+ 0xF107, 0xF115, 0xF11B, 0xF121, 0xF137, 0xF13D, 0xF155, 0xF175,
+ 0xF17B, 0xF18D, 0xF193, 0xF1A5, 0xF1AF, 0xF1B7, 0xF1D5, 0xF1E7,
+ 0xF1ED, 0xF1FD, 0xF209, 0xF20F, 0xF21B, 0xF21D, 0xF223, 0xF227,
+ 0xF233, 0xF23B, 0xF241, 0xF257, 0xF25F, 0xF265, 0xF269, 0xF277,
+ 0xF281, 0xF293, 0xF2A7, 0xF2B1, 0xF2B3, 0xF2B9, 0xF2BD, 0xF2BF,
+ 0xF2DB, 0xF2ED, 0xF2EF, 0xF2F9, 0xF2FF, 0xF305, 0xF30B, 0xF319,
+ 0xF341, 0xF359, 0xF35B, 0xF35F, 0xF367, 0xF373, 0xF377, 0xF38B,
+ 0xF38F, 0xF3AF, 0xF3C1, 0xF3D1, 0xF3D7, 0xF3FB, 0xF403, 0xF409,
+ 0xF40D, 0xF413, 0xF421, 0xF425, 0xF42B, 0xF445, 0xF44B, 0xF455,
+ 0xF463, 0xF475, 0xF47F, 0xF485, 0xF48B, 0xF499, 0xF4A3, 0xF4A9,
+ 0xF4AF, 0xF4BD, 0xF4C3, 0xF4DB, 0xF4DF, 0xF4ED, 0xF503, 0xF50B,
+ 0xF517, 0xF521, 0xF529, 0xF535, 0xF547, 0xF551, 0xF563, 0xF56B,
+ 0xF583, 0xF58D, 0xF595, 0xF599, 0xF5B1, 0xF5B7, 0xF5C9, 0xF5CF,
+ 0xF5D1, 0xF5DB, 0xF5F9, 0xF5FB, 0xF605, 0xF607, 0xF60B, 0xF60D,
+ 0xF635, 0xF637, 0xF653, 0xF65B, 0xF661, 0xF667, 0xF679, 0xF67F,
+ 0xF689, 0xF697, 0xF69B, 0xF6AD, 0xF6CB, 0xF6DD, 0xF6DF, 0xF6EB,
+ 0xF709, 0xF70F, 0xF72D, 0xF731, 0xF743, 0xF74F, 0xF751, 0xF755,
+ 0xF763, 0xF769, 0xF773, 0xF779, 0xF781, 0xF787, 0xF791, 0xF79D,
+ 0xF79F, 0xF7A5, 0xF7B1, 0xF7BB, 0xF7BD, 0xF7CF, 0xF7D3, 0xF7E7,
+ 0xF7EB, 0xF7F1, 0xF7FF, 0xF805, 0xF80B, 0xF821, 0xF827, 0xF82D,
+ 0xF835, 0xF847, 0xF859, 0xF863, 0xF865, 0xF86F, 0xF871, 0xF877,
+ 0xF87B, 0xF881, 0xF88D, 0xF89F, 0xF8A1, 0xF8AB, 0xF8B3, 0xF8B7,
+ 0xF8C9, 0xF8CB, 0xF8D1, 0xF8D7, 0xF8DD, 0xF8E7, 0xF8EF, 0xF8F9,
+ 0xF8FF, 0xF911, 0xF91D, 0xF925, 0xF931, 0xF937, 0xF93B, 0xF941,
+ 0xF94F, 0xF95F, 0xF961, 0xF96D, 0xF971, 0xF977, 0xF99D, 0xF9A3,
+ 0xF9A9, 0xF9B9, 0xF9CD, 0xF9E9, 0xF9FD, 0xFA07, 0xFA0D, 0xFA13,
+ 0xFA21, 0xFA25, 0xFA3F, 0xFA43, 0xFA51, 0xFA5B, 0xFA6D, 0xFA7B,
+ 0xFA97, 0xFA99, 0xFA9D, 0xFAAB, 0xFABB, 0xFABD, 0xFAD9, 0xFADF,
+ 0xFAE7, 0xFAED, 0xFB0F, 0xFB17, 0xFB1B, 0xFB2D, 0xFB2F, 0xFB3F,
+ 0xFB47, 0xFB4D, 0xFB75, 0xFB7D, 0xFB8F, 0xFB93, 0xFBB1, 0xFBB7,
+ 0xFBC3, 0xFBC5, 0xFBE3, 0xFBE9, 0xFBF3, 0xFC01, 0xFC29, 0xFC37,
+ 0xFC41, 0xFC43, 0xFC4F, 0xFC59, 0xFC61, 0xFC65, 0xFC6D, 0xFC73,
+ 0xFC79, 0xFC95, 0xFC97, 0xFC9B, 0xFCA7, 0xFCB5, 0xFCC5, 0xFCCD,
+ 0xFCEB, 0xFCFB, 0xFD0D, 0xFD0F, 0xFD19, 0xFD2B, 0xFD31, 0xFD51,
+ 0xFD55, 0xFD67, 0xFD6D, 0xFD6F, 0xFD7B, 0xFD85, 0xFD97, 0xFD99,
+ 0xFD9F, 0xFDA9, 0xFDB7, 0xFDC9, 0xFDE5, 0xFDEB, 0xFDF3, 0xFE03,
+ 0xFE05, 0xFE09, 0xFE1D, 0xFE27, 0xFE2F, 0xFE41, 0xFE4B, 0xFE4D,
+ 0xFE57, 0xFE5F, 0xFE63, 0xFE69, 0xFE75, 0xFE7B, 0xFE8F, 0xFE93,
+ 0xFE95, 0xFE9B, 0xFE9F, 0xFEB3, 0xFEBD, 0xFED7, 0xFEE9, 0xFEF3,
+ 0xFEF5, 0xFF07, 0xFF0D, 0xFF1D, 0xFF2B, 0xFF2F, 0xFF49, 0xFF4D,
+ 0xFF5B, 0xFF65, 0xFF71, 0xFF7F, 0xFF85, 0xFF8B, 0xFF8F, 0xFF9D,
+ 0xFFA7, 0xFFA9, 0xFFC7, 0xFFD9, 0xFFEF, 0xFFF1
+#endif
+};
diff --git a/security/nss/lib/freebl/mpi/vis_32.il b/security/nss/lib/freebl/mpi/vis_32.il
new file mode 100644
index 0000000000..d2e8024ac2
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/vis_32.il
@@ -0,0 +1,1291 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+! The interface to the VIS instructions as declared below (and in the VIS
+! User's Manual) will not change, but the macro implementation might change
+! in the future.
+
+!--------------------------------------------------------------------
+! Pure edge handling instructions
+!
+! int vis_edge8(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8,8
+ edge8 %o0,%o1,%o0
+ .end
+!
+! int vis_edge8l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8l,8
+ edge8l %o0,%o1,%o0
+ .end
+!
+! int vis_edge16(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16,8
+ edge16 %o0,%o1,%o0
+ .end
+!
+! int vis_edge16l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16l,8
+ edge16l %o0,%o1,%o0
+ .end
+!
+! int vis_edge32(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32,8
+ edge32 %o0,%o1,%o0
+ .end
+!
+! int vis_edge32l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32l,8
+ edge32l %o0,%o1,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Edge handling instructions with negative return values if cc set
+!
+! int vis_edge8cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8cc,8
+ edge8 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge8lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8lcc,8
+ edge8l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge16cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16cc,8
+ edge16 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge16lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16lcc,8
+ edge16l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge32cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32cc,8
+ edge32 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge32lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32lcc,8
+ edge32l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Alignment instructions
+!
+! void *vis_alignaddr(void */*rs1*/, int /*rs2*/);
+!
+ .inline vis_alignaddr,8
+ alignaddr %o0,%o1,%o0
+ .end
+!
+! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/);
+!
+ .inline vis_alignaddrl,8
+ alignaddrl %o0,%o1,%o0
+ .end
+!
+! double vis_faligndata(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_faligndata,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ faligndata %f4,%f10,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Partitioned comparison instructions
+!
+! int vis_fcmple16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmple16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmple16 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpne16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpne16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpne16 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmple32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmple32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmple32 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpne32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpne32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpne32 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpgt16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpgt16 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpeq16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpeq16 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpgt32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpgt32 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpeq32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpeq32 %f4,%f10,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Partitioned arithmetic
+!
+! double vis_fmul8x16(float /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8x16,12
+ st %o0,[%sp+0x44]
+ ld [%sp+0x44],%f4
+ st %o1,[%sp+0x48]
+ st %o2,[%sp+0x4c]
+ ldd [%sp+0x48],%f10
+ fmul8x16 %f4,%f10,%f0
+ .end
+!
+! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/);
+!
+ .inline vis_fmul8x16_dummy,16
+ st %o0,[%sp+0x44]
+ ld [%sp+0x44],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fmul8x16 %f4,%f10,%f0
+ .end
+!
+! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmul8x16au,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fmul8x16au %f4,%f10,%f0
+ .end
+!
+! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmul8x16al,8
+ st %o0,[%sp+0x44]
+ ld [%sp+0x44],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fmul8x16al %f4,%f10,%f0
+ .end
+!
+! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8sux16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fmul8sux16 %f4,%f10,%f0
+ .end
+!
+! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8ulx16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fmul8ulx16 %f4,%f10,%f0
+ .end
+!
+! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmuld8sux16,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fmuld8sux16 %f4,%f10,%f0
+ .end
+!
+! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmuld8ulx16,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fmuld8ulx16 %f4,%f10,%f0
+ .end
+!
+! double vis_fpadd16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpadd16,16
+ std %o0,[%sp+0x40]
+ ldd [%sp+0x40],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpadd16 %f4,%f10,%f0
+ .end
+!
+! float vis_fpadd16s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpadd16s,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fpadd16s %f4,%f10,%f0
+ .end
+!
+! double vis_fpadd32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpadd32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpadd32 %f4,%f10,%f0
+ .end
+!
+! float vis_fpadd32s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpadd32s,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fpadd32s %f4,%f10,%f0
+ .end
+!
+! double vis_fpsub16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpsub16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpsub16 %f4,%f10,%f0
+ .end
+!
+! float vis_fpsub16s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpsub16s,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fpsub16s %f4,%f10,%f0
+ .end
+!
+! double vis_fpsub32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpsub32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpsub32 %f4,%f10,%f0
+ .end
+!
+! float vis_fpsub32s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpsub32s,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fpsub32s %f4,%f10,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Pixel packing
+!
+! float vis_fpack16(double /*frs2*/);
+!
+ .inline vis_fpack16,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fpack16 %f4,%f0
+ .end
+
+!
+! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/);
+!
+ .inline vis_fpack16_pair,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpack16 %f4,%f0
+ fpack16 %f10,%f1
+ .end
+!
+! void vis_st2_fpack16(double, double, double *)
+!
+ .inline vis_st2_fpack16,20
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpack16 %f4,%f0
+ fpack16 %f10,%f1
+ st %f0,[%o4+0]
+ st %f1,[%o4+4]
+ .end
+!
+! void vis_std_fpack16(double, double, double *)
+!
+ .inline vis_std_fpack16,20
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpack16 %f4,%f0
+ fpack16 %f10,%f1
+ std %f0,[%o4]
+ .end
+!
+! void vis_st2_fpackfix(double, double, double *)
+!
+ .inline vis_st2_fpackfix,20
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpackfix %f4,%f0
+ fpackfix %f10,%f1
+ st %f0,[%o4+0]
+ st %f1,[%o4+4]
+ .end
+!
+! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack16_to_hi,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f0
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fpack16 %f4,%f0
+ .end
+
+! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack16_to_lo,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f0
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fpack16 %f4,%f3
+ fmovs %f3,%f1 /* without this, optimizer goes wrong */
+ .end
+
+!
+! double vis_fpack32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpack32 %f4,%f10,%f0
+ .end
+!
+! float vis_fpackfix(double /*frs2*/);
+!
+ .inline vis_fpackfix,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fpackfix %f4,%f0
+ .end
+!
+! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/);
+!
+ .inline vis_fpackfix_pair,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f6
+ fpackfix %f4,%f0
+ fpackfix %f6,%f1
+ .end
+
+!--------------------------------------------------------------------
+! Motion estimation
+!
+! double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/);
+!
+ .inline vis_pdist,24
+ std %o4,[%sp+0x48]
+ ldd [%sp+0x48],%f0
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ pdist %f4,%f10,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Channel merging
+!
+! double vis_fpmerge(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpmerge,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fpmerge %f4,%f10,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Pixel expansion
+!
+! double vis_fexpand(float /*frs2*/);
+!
+ .inline vis_fexpand,4
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ fexpand %f4,%f0
+ .end
+
+! double vis_fexpand_hi(double /*frs2*/);
+!
+ .inline vis_fexpand_hi,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fexpand %f4,%f0
+ .end
+
+! double vis_fexpand_lo(double /*frs2*/);
+!
+ .inline vis_fexpand_lo,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fmovs %f5, %f2
+ fexpand %f2,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Bitwise logical operations
+!
+! double vis_fnor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fnor,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fnor %f4,%f10,%f0
+ .end
+!
+! float vis_fnors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fnors,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fnors %f4,%f10,%f0
+ .end
+!
+! double vis_fandnot(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fandnot,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fandnot1 %f4,%f10,%f0
+ .end
+!
+! float vis_fandnots(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fandnots,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fandnot1s %f4,%f10,%f0
+ .end
+!
+! double vis_fnot(double /*frs1*/);
+!
+ .inline vis_fnot,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fnot1 %f4,%f0
+ .end
+!
+! float vis_fnots(float /*frs1*/);
+!
+ .inline vis_fnots,4
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ fnot1s %f4,%f0
+ .end
+!
+! double vis_fxor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fxor,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fxor %f4,%f10,%f0
+ .end
+!
+! float vis_fxors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fxors,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fxors %f4,%f10,%f0
+ .end
+!
+! double vis_fnand(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fnand,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fnand %f4,%f10,%f0
+ .end
+!
+! float vis_fnands(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fnands,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fnands %f4,%f10,%f0
+ .end
+!
+! double vis_fand(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fand,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fand %f4,%f10,%f0
+ .end
+!
+! float vis_fands(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fands,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fands %f4,%f10,%f0
+ .end
+!
+! double vis_fxnor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fxnor,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fxnor %f4,%f10,%f0
+ .end
+!
+! float vis_fxnors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fxnors,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fxnors %f4,%f10,%f0
+ .end
+!
+! double vis_fsrc(double /*frs1*/);
+!
+ .inline vis_fsrc,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fsrc1 %f4,%f0
+ .end
+!
+! float vis_fsrcs(float /*frs1*/);
+!
+ .inline vis_fsrcs,4
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ fsrc1s %f4,%f0
+ .end
+!
+! double vis_fornot(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fornot,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fornot1 %f4,%f10,%f0
+ .end
+!
+! float vis_fornots(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fornots,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fornot1s %f4,%f10,%f0
+ .end
+!
+! double vis_for(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_for,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ for %f4,%f10,%f0
+ .end
+!
+! float vis_fors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fors,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fors %f4,%f10,%f0
+ .end
+!
+! double vis_fzero(/* void */)
+!
+ .inline vis_fzero,0
+ fzero %f0
+ .end
+!
+! float vis_fzeros(/* void */)
+!
+ .inline vis_fzeros,0
+ fzeros %f0
+ .end
+!
+! double vis_fone(/* void */)
+!
+ .inline vis_fone,0
+ fone %f0
+ .end
+!
+! float vis_fones(/* void */)
+!
+ .inline vis_fones,0
+ fones %f0
+ .end
+
+!--------------------------------------------------------------------
+! Partial store instructions
+!
+! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8P,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P
+ .end
+!
+! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8PL,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc8 ! ASI_PST8_PL
+ .end
+!
+! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask);
+!
+ .inline vis_stdfa_ASI_PST8P_int_pair,16
+ ld [%o0],%f4
+ ld [%o1],%f5
+ stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P
+ .end
+!
+! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8S,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc1 ! ASI_PST8_S
+ .end
+!
+! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST16P,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc2 ! ASI_PST16_P
+ .end
+!
+! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST16S,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc3 ! ASI_PST16_S
+ .end
+!
+! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST32P,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc4 ! ASI_PST32_P
+ .end
+!
+! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST32S,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc5 ! ASI_PST32_S
+ .end
+
+!--------------------------------------------------------------------
+! Short store instructions
+!
+! vis_stdfa_ASI_FL8P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8P,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd0 ! ASI_FL8_P
+ .end
+!
+! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index)
+!
+ .inline vis_stdfa_ASI_FL8P_index,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2+%o3]0xd0 ! ASI_FL8_P
+ .end
+!
+! vis_stdfa_ASI_FL8S(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8S,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd1 ! ASI_FL8_S
+ .end
+!
+! vis_stdfa_ASI_FL16P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16P,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd2 ! ASI_FL16_P
+ .end
+!
+! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index)
+!
+ .inline vis_stdfa_ASI_FL16P_index,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2+%o3]0xd2 ! ASI_FL16_P
+ .end
+!
+! vis_stdfa_ASI_FL16S(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16S,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd3 ! ASI_FL16_S
+ .end
+!
+! vis_stdfa_ASI_FL8PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8PL,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd8 ! ASI_FL8_PL
+ .end
+!
+! vis_stdfa_ASI_FL8SL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8SL,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd9 ! ASI_FL8_SL
+ .end
+!
+! vis_stdfa_ASI_FL16PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16PL,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xda ! ASI_FL16_PL
+ .end
+!
+! vis_stdfa_ASI_FL16SL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16SL,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xdb ! ASI_FL16_SL
+ .end
+
+!--------------------------------------------------------------------
+! Short load instructions
+!
+! double vis_lddfa_ASI_FL8P(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8P,4
+ ldda [%o0]0xd0,%f4 ! ASI_FL8_P
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_FL8P_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL8P_index,8
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index)
+!
+ .inline vis_lddfa_ASI_FL8P_hi,8
+ sra %o1,16,%o1
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index)
+!
+ .inline vis_lddfa_ASI_FL8P_lo,8
+ sll %o1,16,%o1
+ sra %o1,16,%o1
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8S(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8S,4
+ ldda [%o0]0xd1,%f4 ! ASI_FL8_S
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16P(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16P,4
+ ldda [%o0]0xd2,%f4 ! ASI_FL16_P
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16P_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL16P_index,8
+ ldda [%o0+%o1]0xd2,%f4 ! ASI_FL16_P
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16S(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16S,4
+ ldda [%o0]0xd3,%f4 ! ASI_FL16_S
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8PL,4
+ ldda [%o0]0xd8,%f4 ! ASI_FL8_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL8PL_index,8
+ ldda [%o0+%o1]0xd8,%f4 ! ASI_FL8_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8SL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8SL,4
+ ldda [%o0]0xd9,%f4 ! ASI_FL8_SL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16PL,4
+ ldda [%o0]0xda,%f4 ! ASI_FL16_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL16PL_index,8
+ ldda [%o0+%o1]0xda,%f4 ! ASI_FL16_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16SL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16SL,4
+ ldda [%o0]0xdb,%f4 ! ASI_FL16_SL
+ fmovd %f4,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Graphics status register
+!
+! unsigned int vis_read_gsr(void)
+!
+ .inline vis_read_gsr,0
+ rd %gsr,%o0
+ .end
+!
+! void vis_write_gsr(unsigned int /* GSR */)
+!
+ .inline vis_write_gsr,4
+ wr %g0,%o0,%gsr
+ .end
+
+!--------------------------------------------------------------------
+! Voxel texture mapping
+!
+! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/)
+!
+ .inline vis_array8,12
+ sllx %o0,32,%o0
+ srl %o1,0,%o1 ! clear the most significant 32 bits of %o1
+ or %o0,%o1,%o3 ! join %o0 and %o1 into %o3
+ array8 %o3,%o2,%o0
+ .end
+!
+! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/)
+!
+ .inline vis_array16,12
+ sllx %o0,32,%o0
+ srl %o1,0,%o1 ! clear the most significant 32 bits of %o1
+ or %o0,%o1,%o3 ! join %o0 and %o1 into %o3
+ array16 %o3,%o2,%o0
+ .end
+!
+! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/)
+!
+ .inline vis_array32,12
+ sllx %o0,32,%o0
+ srl %o1,0,%o1 ! clear the most significant 32 bits of %o1
+ or %o0,%o1,%o3 ! join %o0 and %o1 into %o3
+ array32 %o3,%o2,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Register aliasing and type casts
+!
+! float vis_read_hi(double /* frs1 */);
+!
+ .inline vis_read_hi,8
+ std %o0,[%sp+0x48] ! store double frs1
+ ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; return %f0;
+ .end
+!
+! float vis_read_lo(double /* frs1 */);
+!
+ .inline vis_read_lo,8
+ std %o0,[%sp+0x48] ! store double frs1
+ ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1;
+ fmovs %f1,%f0 ! %f0 = low word (frs1); return %f0;
+ .end
+!
+! double vis_write_hi(double /* frs1 */, float /* frs2 */);
+!
+ .inline vis_write_hi,12
+ std %o0,[%sp+0x48] ! store double frs1;
+ ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1;
+ st %o2,[%sp+0x44] ! store float frs2;
+ ld [%sp+0x44],%f2 ! %f2 = float frs2;
+ fmovs %f2,%f0 ! %f0 = float frs2; return %f0:f1;
+ .end
+!
+! double vis_write_lo(double /* frs1 */, float /* frs2 */);
+!
+ .inline vis_write_lo,12
+ std %o0,[%sp+0x48] ! store double frs1;
+ ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1;
+ st %o2,[%sp+0x44] ! store float frs2;
+ ld [%sp+0x44],%f2 ! %f2 = float frs2;
+ fmovs %f2,%f1 ! %f1 = float frs2; return %f0:f1;
+ .end
+!
+! double vis_freg_pair(float /* frs1 */, float /* frs2 */);
+!
+ .inline vis_freg_pair,8
+ st %o0,[%sp+0x48] ! store float frs1
+ ld [%sp+0x48],%f0
+ st %o1,[%sp+0x48] ! store float frs2
+ ld [%sp+0x48],%f1
+ .end
+!
+! float vis_to_float(unsigned int /*value*/);
+!
+ .inline vis_to_float,4
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f0
+ .end
+!
+! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/);
+!
+ .inline vis_to_double,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f0
+ .end
+!
+! double vis_to_double_dup(unsigned int /*value*/);
+!
+ .inline vis_to_double_dup,4
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f1
+ fmovs %f1,%f0 ! duplicate value
+ .end
+!
+! double vis_ll_to_double(unsigned long long /*value*/);
+!
+ .inline vis_ll_to_double,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f0
+ .end
+
+!--------------------------------------------------------------------
+! Address space identifier (ASI) register
+!
+! unsigned int vis_read_asi(void)
+!
+ .inline vis_read_asi,0
+ rd %asi,%o0
+ .end
+!
+! void vis_write_asi(unsigned int /* ASI */)
+!
+ .inline vis_write_asi,4
+ wr %g0,%o0,%asi
+ .end
+
+!--------------------------------------------------------------------
+! Load/store from/into alternate space
+!
+! float vis_ldfa_ASI_REG(void *rs1)
+!
+ .inline vis_ldfa_ASI_REG,4
+ lda [%o0+0]%asi,%f4
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! float vis_ldfa_ASI_P(void *rs1)
+!
+ .inline vis_ldfa_ASI_P,4
+ lda [%o0]0x80,%f4 ! ASI_P
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! float vis_ldfa_ASI_PL(void *rs1)
+!
+ .inline vis_ldfa_ASI_PL,4
+ lda [%o0]0x88,%f4 ! ASI_PL
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_REG(void *rs1)
+!
+ .inline vis_lddfa_ASI_REG,4
+ ldda [%o0+0]%asi,%f4
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_P(void *rs1)
+!
+ .inline vis_lddfa_ASI_P,4
+ ldda [%o0]0x80,%f4 ! ASI_P
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_PL,4
+ ldda [%o0]0x88,%f4 ! ASI_PL
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! vis_stfa_ASI_REG(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_REG,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ sta %f4,[%o1+0]%asi
+ .end
+!
+! vis_stfa_ASI_P(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_P,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ sta %f4,[%o1]0x80 ! ASI_P
+ .end
+!
+! vis_stfa_ASI_PL(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_PL,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ sta %f4,[%o1]0x88 ! ASI_PL
+ .end
+!
+! vis_stdfa_ASI_REG(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_REG,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2+0]%asi
+ .end
+!
+! vis_stdfa_ASI_P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_P,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0x80 ! ASI_P
+ .end
+!
+! vis_stdfa_ASI_PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_PL,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0x88 ! ASI_PL
+ .end
+!
+! unsigned short vis_lduha_ASI_REG(void *rs1)
+!
+ .inline vis_lduha_ASI_REG,4
+ lduha [%o0+0]%asi,%o0
+ .end
+!
+! unsigned short vis_lduha_ASI_P(void *rs1)
+!
+ .inline vis_lduha_ASI_P,4
+ lduha [%o0]0x80,%o0 ! ASI_P
+ .end
+!
+! unsigned short vis_lduha_ASI_PL(void *rs1)
+!
+ .inline vis_lduha_ASI_PL,4
+ lduha [%o0]0x88,%o0 ! ASI_PL
+ .end
+!
+! unsigned short vis_lduha_ASI_P_index(void *rs1, long index)
+!
+ .inline vis_lduha_ASI_P_index,8
+ lduha [%o0+%o1]0x80,%o0 ! ASI_P
+ .end
+!
+! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index)
+!
+ .inline vis_lduha_ASI_PL_index,8
+ lduha [%o0+%o1]0x88,%o0 ! ASI_PL
+ .end
+
+!--------------------------------------------------------------------
+! Prefetch
+!
+! void vis_prefetch_read(void * /*address*/);
+!
+ .inline vis_prefetch_read,4
+ prefetch [%o0+0],0
+ .end
+!
+! void vis_prefetch_write(void * /*address*/);
+!
+ .inline vis_prefetch_write,4
+ prefetch [%o0+0],2
+ .end
diff --git a/security/nss/lib/freebl/mpi/vis_64.il b/security/nss/lib/freebl/mpi/vis_64.il
new file mode 100644
index 0000000000..cbe2b5aa27
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/vis_64.il
@@ -0,0 +1,997 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+! This file is to be used in place of vis.il in 64-bit builds.
+
+!--------------------------------------------------------------------
+! Pure edge handling instructions
+!
+! int vis_edge8(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8,16
+ edge8 %o0,%o1,%o0
+ .end
+!
+! int vis_edge8l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8l,16
+ edge8l %o0,%o1,%o0
+ .end
+!
+! int vis_edge16(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16,16
+ edge16 %o0,%o1,%o0
+ .end
+!
+! int vis_edge16l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16l,16
+ edge16l %o0,%o1,%o0
+ .end
+!
+! int vis_edge32(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32,16
+ edge32 %o0,%o1,%o0
+ .end
+!
+! int vis_edge32l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32l,16
+ edge32l %o0,%o1,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Edge handling instructions with negative return values if cc set
+!
+! int vis_edge8cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8cc,16
+ edge8 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge8lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8lcc,16
+ edge8l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge16cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16cc,16
+ edge16 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge16lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16lcc,16
+ edge16l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge32cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32cc,16
+ edge32 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge32lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32lcc,16
+ edge32l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Alignment instructions
+!
+! void *vis_alignaddr(void */*rs1*/, int /*rs2*/);
+!
+ .inline vis_alignaddr,12
+ alignaddr %o0,%o1,%o0
+ .end
+!
+! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/);
+!
+ .inline vis_alignaddrl,12
+ alignaddrl %o0,%o1,%o0
+ .end
+!
+! double vis_faligndata(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_faligndata,16
+ faligndata %f0,%f2,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Partitioned comparison instructions
+!
+! int vis_fcmple16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmple16,16
+ fcmple16 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpne16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpne16,16
+ fcmpne16 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmple32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmple32,16
+ fcmple32 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpne32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpne32,16
+ fcmpne32 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpgt16,16
+ fcmpgt16 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpeq16,16
+ fcmpeq16 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpgt32,16
+ fcmpgt32 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpeq32,16
+ fcmpeq32 %f0,%f2,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Partitioned arithmetic
+!
+! double vis_fmul8x16(float /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8x16,12
+ fmul8x16 %f1,%f2,%f0
+ .end
+!
+! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/);
+!
+ .inline vis_fmul8x16_dummy,16
+ fmul8x16 %f1,%f4,%f0
+ .end
+!
+! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmul8x16au,8
+ fmul8x16au %f1,%f3,%f0
+ .end
+!
+! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmul8x16al,8
+ fmul8x16al %f1,%f3,%f0
+ .end
+!
+! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8sux16,16
+ fmul8sux16 %f0,%f2,%f0
+ .end
+!
+! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8ulx16,16
+ fmul8ulx16 %f0,%f2,%f0
+ .end
+!
+! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmuld8sux16,8
+ fmuld8sux16 %f1,%f3,%f0
+ .end
+!
+! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmuld8ulx16,8
+ fmuld8ulx16 %f1,%f3,%f0
+ .end
+!
+! double vis_fpadd16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpadd16,16
+ fpadd16 %f0,%f2,%f0
+ .end
+!
+! float vis_fpadd16s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpadd16s,8
+ fpadd16s %f1,%f3,%f0
+ .end
+!
+! double vis_fpadd32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpadd32,16
+ fpadd32 %f0,%f2,%f0
+ .end
+!
+! float vis_fpadd32s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpadd32s,8
+ fpadd32s %f1,%f3,%f0
+ .end
+!
+! double vis_fpsub16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpsub16,16
+ fpsub16 %f0,%f2,%f0
+ .end
+!
+! float vis_fpsub16s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpsub16s,8
+ fpsub16s %f1,%f3,%f0
+ .end
+!
+! double vis_fpsub32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpsub32,16
+ fpsub32 %f0,%f2,%f0
+ .end
+!
+! float vis_fpsub32s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpsub32s,8
+ fpsub32s %f1,%f3,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Pixel packing
+!
+! float vis_fpack16(double /*frs2*/);
+!
+ .inline vis_fpack16,8
+ fpack16 %f0,%f0
+ .end
+!
+! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/);
+!
+ .inline vis_fpack16_pair,16
+ fpack16 %f0,%f0
+ fpack16 %f2,%f1
+ .end
+!
+! void vis_st2_fpack16(double, double, double *)
+!
+ .inline vis_st2_fpack16,24
+ fpack16 %f0,%f0
+ fpack16 %f2,%f1
+ st %f0,[%o2+0]
+ st %f1,[%o2+4]
+ .end
+!
+! void vis_std_fpack16(double, double, double *)
+!
+ .inline vis_std_fpack16,24
+ fpack16 %f0,%f0
+ fpack16 %f2,%f1
+ std %f0,[%o2]
+ .end
+!
+! void vis_st2_fpackfix(double, double, double *)
+!
+ .inline vis_st2_fpackfix,24
+ fpackfix %f0,%f0
+ fpackfix %f2,%f1
+ st %f0,[%o2+0]
+ st %f1,[%o2+4]
+ .end
+!
+! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack16_to_hi,16
+ fpack16 %f2,%f0
+ .end
+
+! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack16_to_lo,16
+ fpack16 %f2,%f3
+ fmovs %f3,%f1 /* without this, optimizer goes wrong */
+ .end
+
+!
+! double vis_fpack32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack32,16
+ fpack32 %f0,%f2,%f0
+ .end
+!
+! float vis_fpackfix(double /*frs2*/);
+!
+ .inline vis_fpackfix,8
+ fpackfix %f0,%f0
+ .end
+!
+! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/);
+!
+ .inline vis_fpackfix_pair,16
+ fpackfix %f0,%f0
+ fpackfix %f2,%f1
+ .end
+
+!--------------------------------------------------------------------
+! Motion estimation
+!
+! double vis_pxldist64(double accum /*frd*/, double pxls1 /*frs1*/,
+! double pxls2 /*frs2*/);
+!
+ .inline vis_pxldist64,24
+ pdist %f2,%f4,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Channel merging
+!
+! double vis_fpmerge(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpmerge,8
+ fpmerge %f1,%f3,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Pixel expansion
+!
+! double vis_fexpand(float /*frs2*/);
+!
+ .inline vis_fexpand,4
+ fexpand %f1,%f0
+ .end
+
+! double vis_fexpand_hi(double /*frs2*/);
+!
+ .inline vis_fexpand_hi,8
+ fexpand %f0,%f0
+ .end
+
+! double vis_fexpand_lo(double /*frs2*/);
+!
+ .inline vis_fexpand_lo,8
+ fexpand %f1,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Bitwise logical operations
+!
+! double vis_fnor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fnor,16
+ fnor %f0,%f2,%f0
+ .end
+!
+! float vis_fnors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fnors,8
+ fnors %f1,%f3,%f0
+ .end
+!
+! double vis_fandnot(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fandnot,16
+ fandnot1 %f0,%f2,%f0
+ .end
+!
+! float vis_fandnots(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fandnots,8
+ fandnot1s %f1,%f3,%f0
+ .end
+!
+! double vis_fnot(double /*frs1*/);
+!
+ .inline vis_fnot,8
+ fnot1 %f0,%f0
+ .end
+!
+! float vis_fnots(float /*frs1*/);
+!
+ .inline vis_fnots,4
+ fnot1s %f1,%f0
+ .end
+!
+! double vis_fxor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fxor,16
+ fxor %f0,%f2,%f0
+ .end
+!
+! float vis_fxors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fxors,8
+ fxors %f1,%f3,%f0
+ .end
+!
+! double vis_fnand(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fnand,16
+ fnand %f0,%f2,%f0
+ .end
+!
+! float vis_fnands(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fnands,8
+ fnands %f1,%f3,%f0
+ .end
+!
+! double vis_fand(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fand,16
+ fand %f0,%f2,%f0
+ .end
+!
+! float vis_fands(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fands,8
+ fands %f1,%f3,%f0
+ .end
+!
+! double vis_fxnor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fxnor,16
+ fxnor %f0,%f2,%f0
+ .end
+!
+! float vis_fxnors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fxnors,8
+ fxnors %f1,%f3,%f0
+ .end
+!
+! double vis_fsrc(double /*frs1*/);
+!
+ .inline vis_fsrc,8
+ fsrc1 %f0,%f0
+ .end
+!
+! float vis_fsrcs(float /*frs1*/);
+!
+ .inline vis_fsrcs,4
+ fsrc1s %f1,%f0
+ .end
+!
+! double vis_fornot(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fornot,16
+ fornot1 %f0,%f2,%f0
+ .end
+!
+! float vis_fornots(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fornots,8
+ fornot1s %f1,%f3,%f0
+ .end
+!
+! double vis_for(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_for,16
+ for %f0,%f2,%f0
+ .end
+!
+! float vis_fors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fors,8
+ fors %f1,%f3,%f0
+ .end
+!
+! double vis_fzero(/* void */)
+!
+ .inline vis_fzero,0
+ fzero %f0
+ .end
+!
+! float vis_fzeros(/* void */)
+!
+ .inline vis_fzeros,0
+ fzeros %f0
+ .end
+!
+! double vis_fone(/* void */)
+!
+ .inline vis_fone,0
+ fone %f0
+ .end
+!
+! float vis_fones(/* void */)
+!
+ .inline vis_fones,0
+ fones %f0
+ .end
+
+!--------------------------------------------------------------------
+! Partial store instructions
+!
+! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8P,20
+ stda %f0,[%o1]%o2,0xc0 ! ASI_PST8_P
+ .end
+!
+! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8PL,20
+ stda %f0,[%o1]%o2,0xc8 ! ASI_PST8_PL
+ .end
+!
+! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask);
+!
+ .inline vis_stdfa_ASI_PST8P_int_pair,28
+ ld [%o0],%f4
+ ld [%o1],%f5
+ stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P
+ .end
+!
+! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8S,20
+ stda %f0,[%o1]%o2,0xc1 ! ASI_PST8_S
+ .end
+!
+! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST16P,20
+ stda %f0,[%o1]%o2,0xc2 ! ASI_PST16_P
+ .end
+!
+! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST16S,20
+ stda %f0,[%o1]%o2,0xc3 ! ASI_PST16_S
+ .end
+!
+! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST32P,20
+ stda %f0,[%o1]%o2,0xc4 ! ASI_PST32_P
+ .end
+!
+! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST32S,20
+ stda %f0,[%o1]%o2,0xc5 ! ASI_PST32_S
+ .end
+
+!--------------------------------------------------------------------
+! Short store instructions
+!
+! vis_stdfa_ASI_FL8P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8P,16
+ stda %f0,[%o1]0xd0 ! ASI_FL8_P
+ .end
+!
+! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index)
+!
+ .inline vis_stdfa_ASI_FL8P_index,24
+ stda %f0,[%o1+%o2]0xd0 ! ASI_FL8_P
+ .end
+!
+! vis_stdfa_ASI_FL8S(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8S,16
+ stda %f0,[%o1]0xd1 ! ASI_FL8_S
+ .end
+!
+! vis_stdfa_ASI_FL16P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16P,16
+ stda %f0,[%o1]0xd2 ! ASI_FL16_P
+ .end
+!
+! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index)
+!
+ .inline vis_stdfa_ASI_FL16P_index,24
+ stda %f0,[%o1+%o2]0xd2 ! ASI_FL16_P
+ .end
+!
+! vis_stdfa_ASI_FL16S(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16S,16
+ stda %f0,[%o1]0xd3 ! ASI_FL16_S
+ .end
+!
+! vis_stdfa_ASI_FL8PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8PL,16
+ stda %f0,[%o1]0xd8 ! ASI_FL8_PL
+ .end
+!
+! vis_stdfa_ASI_FL8SL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8SL,16
+ stda %f0,[%o1]0xd9 ! ASI_FL8_SL
+ .end
+!
+! vis_stdfa_ASI_FL16PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16PL,16
+ stda %f0,[%o1]0xda ! ASI_FL16_PL
+ .end
+!
+! vis_stdfa_ASI_FL16SL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16SL,16
+ stda %f0,[%o1]0xdb ! ASI_FL16_SL
+ .end
+
+!--------------------------------------------------------------------
+! Short load instructions
+!
+! double vis_lddfa_ASI_FL8P(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8P,8
+ ldda [%o0]0xd0,%f4 ! ASI_FL8_P
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_FL8P_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL8P_index,16
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index)
+!
+ .inline vis_lddfa_ASI_FL8P_hi,12
+ sra %o1,16,%o1
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index)
+!
+ .inline vis_lddfa_ASI_FL8P_lo,12
+ sll %o1,16,%o1
+ sra %o1,16,%o1
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8S(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8S,8
+ ldda [%o0]0xd1,%f4 ! ASI_FL8_S
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16P(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16P,8
+ ldda [%o0]0xd2,%f4 ! ASI_FL16_P
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16P_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL16P_index,16
+ ldda [%o0+%o1]0xd2,%f4 ! ASI_FL16_P
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16S(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16S,8
+ ldda [%o0]0xd3,%f4 ! ASI_FL16_S
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8PL,8
+ ldda [%o0]0xd8,%f4 ! ASI_FL8_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL8PL_index,16
+ ldda [%o0+%o1]0xd8,%f4 ! ASI_FL8_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8SL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8SL,8
+ ldda [%o0]0xd9,%f4 ! ASI_FL8_SL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16PL,8
+ ldda [%o0]0xda,%f4 ! ASI_FL16_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL16PL_index,16
+ ldda [%o0+%o1]0xda,%f4 ! ASI_FL16_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16SL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16SL,8
+ ldda [%o0]0xdb,%f4 ! ASI_FL16_SL
+ fmovd %f4,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Graphics status register
+!
+! unsigned int vis_read_gsr(void)
+!
+ .inline vis_read_gsr,0
+ rd %gsr,%o0
+ .end
+!
+! void vis_write_gsr(unsigned int /* GSR */)
+!
+ .inline vis_write_gsr,4
+ wr %g0,%o0,%gsr
+ .end
+
+!--------------------------------------------------------------------
+! Voxel texture mapping
+!
+! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/)
+!
+ .inline vis_array8,12
+ array8 %o0,%o1,%o0
+ .end
+!
+! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/)
+!
+ .inline vis_array16,12
+ array16 %o0,%o1,%o0
+ .end
+!
+! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/)
+!
+ .inline vis_array32,12
+ array32 %o0,%o1,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Register aliasing and type casts
+!
+! float vis_read_hi(double /* frs1 */);
+!
+ .inline vis_read_hi,8
+ fmovs %f0,%f0
+ .end
+!
+! float vis_read_lo(double /* frs1 */);
+!
+ .inline vis_read_lo,8
+ fmovs %f1,%f0 ! %f0 = low word (frs1); return %f0;
+ .end
+!
+! double vis_write_hi(double /* frs1 */, float /* frs2 */);
+!
+ .inline vis_write_hi,12
+ fmovs %f3,%f0 ! %f3 = float frs2; return %f0:f1;
+ .end
+!
+! double vis_write_lo(double /* frs1 */, float /* frs2 */);
+!
+ .inline vis_write_lo,12
+ fmovs %f3,%f1 ! %f3 = float frs2; return %f0:f1;
+ .end
+!
+! double vis_freg_pair(float /* frs1 */, float /* frs2 */);
+!
+ .inline vis_freg_pair,8
+ fmovs %f1,%f0 ! %f1 = float frs1; put in hi;
+ fmovs %f3,%f1 ! %f3 = float frs2; put in lo; return %f0:f1;
+ .end
+!
+! float vis_to_float(unsigned int /*value*/);
+!
+ .inline vis_to_float,4
+ st %o0,[%sp+2183]
+ ld [%sp+2183],%f0
+ .end
+!
+! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/);
+!
+ .inline vis_to_double,8
+ st %o0,[%sp+2183]
+ ld [%sp+2183],%f0
+ st %o1,[%sp+2183]
+ ld [%sp+2183],%f1
+ .end
+!
+! double vis_to_double_dup(unsigned int /*value*/);
+!
+ .inline vis_to_double_dup,4
+ st %o0,[%sp+2183]
+ ld [%sp+2183],%f1
+ fmovs %f1,%f0 ! duplicate value
+ .end
+!
+! double vis_ll_to_double(unsigned long long /*value*/);
+!
+ .inline vis_ll_to_double,8
+ stx %o0,[%sp+2183]
+ ldd [%sp+2183],%f0
+ .end
+
+!--------------------------------------------------------------------
+! Address space identifier (ASI) register
+!
+! unsigned int vis_read_asi(void)
+!
+ .inline vis_read_asi,0
+ rd %asi,%o0
+ .end
+!
+! void vis_write_asi(unsigned int /* ASI */)
+!
+ .inline vis_write_asi,4
+ wr %g0,%o0,%asi
+ .end
+
+!--------------------------------------------------------------------
+! Load/store from/into alternate space
+!
+! float vis_ldfa_ASI_REG(void *rs1)
+!
+ .inline vis_ldfa_ASI_REG,8
+ lda [%o0+0]%asi,%f4
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! float vis_ldfa_ASI_P(void *rs1)
+!
+ .inline vis_ldfa_ASI_P,8
+ lda [%o0]0x80,%f4 ! ASI_P
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! float vis_ldfa_ASI_PL(void *rs1)
+!
+ .inline vis_ldfa_ASI_PL,8
+ lda [%o0]0x88,%f4 ! ASI_PL
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_REG(void *rs1)
+!
+ .inline vis_lddfa_ASI_REG,8
+ ldda [%o0+0]%asi,%f4
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_P(void *rs1)
+!
+ .inline vis_lddfa_ASI_P,8
+ ldda [%o0]0x80,%f4 ! ASI_P
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_PL,8
+ ldda [%o0]0x88,%f4 ! ASI_PL
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! vis_stfa_ASI_REG(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_REG,12
+ sta %f1,[%o1+0]%asi
+ .end
+!
+! vis_stfa_ASI_P(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_P,12
+ sta %f1,[%o1]0x80 ! ASI_P
+ .end
+!
+! vis_stfa_ASI_PL(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_PL,12
+ sta %f1,[%o1]0x88 ! ASI_PL
+ .end
+!
+! vis_stdfa_ASI_REG(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_REG,16
+ stda %f0,[%o1+0]%asi
+ .end
+!
+! vis_stdfa_ASI_P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_P,16
+ stda %f0,[%o1]0x80 ! ASI_P
+ .end
+!
+! vis_stdfa_ASI_PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_PL,16
+ stda %f0,[%o1]0x88 ! ASI_PL
+ .end
+!
+! unsigned short vis_lduha_ASI_REG(void *rs1)
+!
+ .inline vis_lduha_ASI_REG,8
+ lduha [%o0+0]%asi,%o0
+ .end
+!
+! unsigned short vis_lduha_ASI_P(void *rs1)
+!
+ .inline vis_lduha_ASI_P,8
+ lduha [%o0]0x80,%o0 ! ASI_P
+ .end
+!
+! unsigned short vis_lduha_ASI_PL(void *rs1)
+!
+ .inline vis_lduha_ASI_PL,8
+ lduha [%o0]0x88,%o0 ! ASI_PL
+ .end
+!
+! unsigned short vis_lduha_ASI_P_index(void *rs1, long index)
+!
+ .inline vis_lduha_ASI_P_index,16
+ lduha [%o0+%o1]0x80,%o0 ! ASI_P
+ .end
+!
+! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index)
+!
+ .inline vis_lduha_ASI_PL_index,16
+ lduha [%o0+%o1]0x88,%o0 ! ASI_PL
+ .end
+
+!--------------------------------------------------------------------
+! Prefetch
+!
+! void vis_prefetch_read(void * /*address*/);
+!
+ .inline vis_prefetch_read,8
+ prefetch [%o0+0],0
+ .end
+!
+! void vis_prefetch_write(void * /*address*/);
+!
+ .inline vis_prefetch_write,8
+ prefetch [%o0+0],2
+ .end
diff --git a/security/nss/lib/freebl/mpi/vis_proto.h b/security/nss/lib/freebl/mpi/vis_proto.h
new file mode 100644
index 0000000000..275de59df8
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/vis_proto.h
@@ -0,0 +1,234 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Prototypes for the inline templates in vis.il
+ */
+
+#ifndef VIS_PROTO_H
+#define VIS_PROTO_H
+
+#pragma ident "@(#)vis_proto.h 1.3 97/03/30 SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/* Pure edge handling instructions */
+int vis_edge8(void * /*frs1*/, void * /*frs2*/);
+int vis_edge8l(void * /*frs1*/, void * /*frs2*/);
+int vis_edge16(void * /*frs1*/, void * /*frs2*/);
+int vis_edge16l(void * /*frs1*/, void * /*frs2*/);
+int vis_edge32(void * /*frs1*/, void * /*frs2*/);
+int vis_edge32l(void * /*frs1*/, void * /*frs2*/);
+
+/* Edge handling instructions with negative return values if cc set. */
+int vis_edge8cc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge8lcc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge16cc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge16lcc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge32cc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge32lcc(void * /*frs1*/, void * /*frs2*/);
+
+/* Alignment instructions. */
+void *vis_alignaddr(void * /*rs1*/, int /*rs2*/);
+void *vis_alignaddrl(void * /*rs1*/, int /*rs2*/);
+double vis_faligndata(double /*frs1*/, double /*frs2*/);
+
+/* Partitioned comparison instructions. */
+int vis_fcmple16(double /*frs1*/, double /*frs2*/);
+int vis_fcmpne16(double /*frs1*/, double /*frs2*/);
+int vis_fcmple32(double /*frs1*/, double /*frs2*/);
+int vis_fcmpne32(double /*frs1*/, double /*frs2*/);
+int vis_fcmpgt16(double /*frs1*/, double /*frs2*/);
+int vis_fcmpeq16(double /*frs1*/, double /*frs2*/);
+int vis_fcmpgt32(double /*frs1*/, double /*frs2*/);
+int vis_fcmpeq32(double /*frs1*/, double /*frs2*/);
+
+/* Partitioned multiplication. */
+#if 0
+double vis_fmul8x16(float /*frs1*/, double /*frs2*/);
+#endif
+double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/);
+double vis_fmul8x16au(float /*frs1*/, float /*frs2*/);
+double vis_fmul8x16al(float /*frs1*/, float /*frs2*/);
+double vis_fmul8sux16(double /*frs1*/, double /*frs2*/);
+double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/);
+double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/);
+double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/);
+
+/* Partitioned addition & subtraction. */
+double vis_fpadd16(double /*frs1*/, double /*frs2*/);
+float vis_fpadd16s(float /*frs1*/, float /*frs2*/);
+double vis_fpadd32(double /*frs1*/, double /*frs2*/);
+float vis_fpadd32s(float /*frs1*/, float /*frs2*/);
+double vis_fpsub16(double /*frs1*/, double /*frs2*/);
+float vis_fpsub16s(float /*frs1*/, float /*frs2*/);
+double vis_fpsub32(double /*frs1*/, double /*frs2*/);
+float vis_fpsub32s(float /*frs1*/, float /*frs2*/);
+
+/* Pixel packing & clamping. */
+float vis_fpack16(double /*frs2*/);
+double vis_fpack32(double /*frs1*/, double /*frs2*/);
+float vis_fpackfix(double /*frs2*/);
+
+/* Combined pack ops. */
+double vis_fpack16_pair(double /*frs2*/, double /*frs2*/);
+double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/);
+void vis_st2_fpack16(double, double, double *);
+void vis_std_fpack16(double, double, double *);
+void vis_st2_fpackfix(double, double, double *);
+
+double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/);
+double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/);
+
+/* Motion estimation. */
+double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/);
+
+/* Channel merging. */
+double vis_fpmerge(float /*frs1*/, float /*frs2*/);
+
+/* Pixel expansion. */
+double vis_fexpand(float /*frs2*/);
+double vis_fexpand_hi(double /*frs2*/);
+double vis_fexpand_lo(double /*frs2*/);
+
+/* Bitwise logical operators. */
+double vis_fnor(double /*frs1*/, double /*frs2*/);
+float vis_fnors(float /*frs1*/, float /*frs2*/);
+double vis_fandnot(double /*frs1*/, double /*frs2*/);
+float vis_fandnots(float /*frs1*/, float /*frs2*/);
+double vis_fnot(double /*frs1*/);
+float vis_fnots(float /*frs1*/);
+double vis_fxor(double /*frs1*/, double /*frs2*/);
+float vis_fxors(float /*frs1*/, float /*frs2*/);
+double vis_fnand(double /*frs1*/, double /*frs2*/);
+float vis_fnands(float /*frs1*/, float /*frs2*/);
+double vis_fand(double /*frs1*/, double /*frs2*/);
+float vis_fands(float /*frs1*/, float /*frs2*/);
+double vis_fxnor(double /*frs1*/, double /*frs2*/);
+float vis_fxnors(float /*frs1*/, float /*frs2*/);
+double vis_fsrc(double /*frs1*/);
+float vis_fsrcs(float /*frs1*/);
+double vis_fornot(double /*frs1*/, double /*frs2*/);
+float vis_fornots(float /*frs1*/, float /*frs2*/);
+double vis_for(double /*frs1*/, double /*frs2*/);
+float vis_fors(float /*frs1*/, float /*frs2*/);
+double vis_fzero(void);
+float vis_fzeros(void);
+double vis_fone(void);
+float vis_fones(void);
+
+/* Partial stores. */
+void vis_stdfa_ASI_PST8P(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST8PL(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST8P_int_pair(void * /*rs1*/, void * /*rs2*/,
+ void * /*rs3*/, int /*rmask*/);
+void vis_stdfa_ASI_PST8S(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST16P(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST16S(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST32P(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST32S(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+
+/* Byte & short stores. */
+void vis_stdfa_ASI_FL8P(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL8P_index(double /*frd*/, void * /*rs1*/, long /*index*/);
+void vis_stdfa_ASI_FL8S(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL16P(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL16P_index(double /*frd*/, void * /*rs1*/, long /*index*/);
+void vis_stdfa_ASI_FL16S(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL8PL(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL8SL(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL16PL(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL16SL(double /*frd*/, void * /*rs1*/);
+
+/* Byte & short loads. */
+double vis_lddfa_ASI_FL8P(void * /*rs1*/);
+double vis_lddfa_ASI_FL8P_index(void * /*rs1*/, long /*index*/);
+double vis_lddfa_ASI_FL8P_hi(void * /*rs1*/, unsigned int /*index*/);
+double vis_lddfa_ASI_FL8P_lo(void * /*rs1*/, unsigned int /*index*/);
+double vis_lddfa_ASI_FL8S(void * /*rs1*/);
+double vis_lddfa_ASI_FL16P(void * /*rs1*/);
+double vis_lddfa_ASI_FL16P_index(void * /*rs1*/, long /*index*/);
+double vis_lddfa_ASI_FL16S(void * /*rs1*/);
+double vis_lddfa_ASI_FL8PL(void * /*rs1*/);
+double vis_lddfa_ASI_FL8SL(void * /*rs1*/);
+double vis_lddfa_ASI_FL16PL(void * /*rs1*/);
+double vis_lddfa_ASI_FL16SL(void * /*rs1*/);
+
+/* Direct write to GSR, read from GSR */
+void vis_write_gsr(unsigned int /*GSR*/);
+unsigned int vis_read_gsr(void);
+
+/* Voxel texture mapping. */
+#if !defined(_NO_LONGLONG)
+unsigned long vis_array8(unsigned long long /*rs1*/, int /*rs2*/);
+unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/);
+unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/);
+#endif /* !defined(_NO_LONGLONG) */
+
+/* Register aliasing and type casts. */
+float vis_read_hi(double /*frs1*/);
+float vis_read_lo(double /*frs1*/);
+double vis_write_hi(double /*frs1*/, float /*frs2*/);
+double vis_write_lo(double /*frs1*/, float /*frs2*/);
+double vis_freg_pair(float /*frs1*/, float /*frs2*/);
+float vis_to_float(unsigned int /*value*/);
+double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/);
+double vis_to_double_dup(unsigned int /*value*/);
+#if !defined(_NO_LONGLONG)
+double vis_ll_to_double(unsigned long long /*value*/);
+#endif /* !defined(_NO_LONGLONG) */
+
+/* Miscellany (no inlines) */
+void vis_error(char * /*fmt*/, int /*a0*/);
+void vis_sim_init(void);
+
+/* For better performance */
+#define vis_fmul8x16(farg, darg) vis_fmul8x16_dummy((farg), 0, (darg))
+
+/* Nicknames for explicit ASI loads and stores. */
+#define vis_st_u8 vis_stdfa_ASI_FL8P
+#define vis_st_u8_i vis_stdfa_ASI_FL8P_index
+#define vis_st_u8_le vis_stdfa_ASI_FL8PL
+#define vis_st_u16 vis_stdfa_ASI_FL16P
+#define vis_st_u16_i vis_stdfa_ASI_FL16P_index
+#define vis_st_u16_le vis_stdfa_ASI_FL16PL
+
+#define vis_ld_u8 vis_lddfa_ASI_FL8P
+#define vis_ld_u8_i vis_lddfa_ASI_FL8P_index
+#define vis_ld_u8_le vis_lddfa_ASI_FL8PL
+#define vis_ld_u16 vis_lddfa_ASI_FL16P
+#define vis_ld_u16_i vis_lddfa_ASI_FL16P_index
+#define vis_ld_u16_le vis_lddfa_ASI_FL16PL
+
+#define vis_pst_8 vis_stdfa_ASI_PST8P
+#define vis_pst_16 vis_stdfa_ASI_PST16P
+#define vis_pst_32 vis_stdfa_ASI_PST32P
+
+#define vis_st_u8s vis_stdfa_ASI_FL8S
+#define vis_st_u8s_le vis_stdfa_ASI_FL8SL
+#define vis_st_u16s vis_stdfa_ASI_FL16S
+#define vis_st_u16s_le vis_stdfa_ASI_FL16SL
+
+#define vis_ld_u8s vis_lddfa_ASI_FL8S
+#define vis_ld_u8s_le vis_lddfa_ASI_FL8SL
+#define vis_ld_u16s vis_lddfa_ASI_FL16S
+#define vis_ld_u16s_le vis_lddfa_ASI_FL16SL
+
+#define vis_pst_8s vis_stdfa_ASI_PST8S
+#define vis_pst_16s vis_stdfa_ASI_PST16S
+#define vis_pst_32s vis_stdfa_ASI_PST32S
+
+/* "<" and ">=" may be implemented in terms of ">" and "<=". */
+#define vis_fcmplt16(a, b) vis_fcmpgt16((b), (a))
+#define vis_fcmplt32(a, b) vis_fcmpgt32((b), (a))
+#define vis_fcmpge16(a, b) vis_fcmple16((b), (a))
+#define vis_fcmpge32(a, b) vis_fcmple32((b), (a))
+
+#ifdef __cplusplus
+} // End of extern "C"
+#endif /* __cplusplus */
+
+#endif /* VIS_PROTO_H */