diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /comm/third_party/libgcrypt/mpi | |
parent | Initial commit. (diff) | |
download | thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'comm/third_party/libgcrypt/mpi')
165 files changed, 25719 insertions, 0 deletions
diff --git a/comm/third_party/libgcrypt/mpi/ChangeLog-2011 b/comm/third_party/libgcrypt/mpi/ChangeLog-2011 new file mode 100644 index 0000000000..1e07872180 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/ChangeLog-2011 @@ -0,0 +1,831 @@ +2011-12-01 Werner Koch <wk@g10code.com> + + NB: ChangeLog files are no longer manually maintained. Starting + on December 1st, 2011 we put change information only in the GIT + commit log, and generate a top-level ChangeLog file from logs at + "make dist". See doc/HACKING for details. + +2011-07-04 Werner Koch <wk@g10code.com> + + * longlong.h (add_ssaaaa) [__arm__]: Do no use asm if thumb code + generation is enabled. This is bug#1202. Reported for gpg 1.4. + +2011-03-28 Werner Koch <wk@g10code.com> + + * mpi-pow.c (gcry_mpi_powm): Remove unused var RSEC. + +2011-02-01 Werner Koch <wk@g10code.com> + + * mpi-cmp.c (gcry_mpi_cmp): Allow comparing of opaque MPIs. + +2010-04-12 Brad Hards <bradh@frogmouth.net> (wk) + + Spelling fixes. + +2010-02-22 Aurelien Jarno <aurel32@debian.org> (wk) + + * longlong.h (umul_ppmm) <mips> [__GNUC__ >= 4.4]: Patch according + to recommended gcc 4.4 changes. + +2009-12-09 Werner Koch <wk@g10code.com> + + * config.links: Remove asm modules for all sparc64. This is + debian#560028. + +2009-05-26 Werner Koch <wk@g10code.com> + + * mpicoder.c (mpi_read_from_buffer): Allow zero-sized MPIs (i.e a + zero). + +2009-02-16 Werner Koch <wk@g10code.com> + + * mpiutil.c: Remove memory.h. + +2008-12-05 Werner Koch <wk@g10code.com> + + * mpicoder.c (mpi_read_from_buffer): Do not bail out if the mpi is + larger than the buffer (potential problem). Do not print error + messages. + (mpi_fromstr): Return an error instead of hitting an assert. + (gcry_mpi_scan) <PGP>: Fix potential double free problem. + (gcry_mpi_scan) <HEX>: Fix potential memory leak. + (do_get_buffer): Return NULL on memory allocation failure. + (gcry_mpi_print): Check result of do_get_buffer. + (gcry_mpi_aprint): Return error on a memory allocation failure. + + * mpicoder.c: Re-indent. + +2008-12-03 Werner Koch <wk@g10code.com> + + * mpi-pow.c (gcry_mpi_powm): Fix last change. Asserts are really + useful! + +2008-12-02 Werner Koch <wk@g10code.com> + + * mpi-pow.c (gcry_mpi_powm): Re-indent. + (gcry_mpi_powm): Simplified allocation of the result to fix a + double free bug. This is bug#977. Reported by Haakon Ringberg. + +2008-08-20 Werner Koch <wk@g10code.com> + + * mpi-bit.c (gcry_mpi_lshift): Actually implement. + +2008-08-19 Werner Koch <wk@g10code.com> + + * mpi-bit.c (gcry_mpi_lshift): New. + +2007-10-31 Werner Koch <wk@g10code.com> + + * mpi-mod.c (gcry_mpi_mod): Remove + * mpi-inv.c (_gcry_mpi_invm): Remove _ prefix. + * mpiutil.c (_gcry_mpi_swap): Remove. + (_gcry_mpi_new): Remove. + (_gcry_mpi_snew): Remove. + (gcry_mpi_invm): Remove. + (gcry_mpi_copy): Remove and rename _version to this. + (gcry_mpi_set, gcry_mpi_set_ui): Merge with _ version. + * mpi-inv.c (gcry_mpi_invm): Remove _ prefix and return 1. + * mpi-mul.c (gcry_mpi_mul_2exp): Remove and rename _ version to this. + +2007-10-29 Werner Koch <wk@g10code.com> + + * config.links: No Candadian Cross here, thus use $host instead of + $target. + +2007-10-26 Werner Koch <wk@g10code.com> + + * config.links (mpi_optional_modules): Special rules for Apple + Darwin on ia32 from Gregor Riepl. + +2007-05-09 Marcus Brinkmann <marcus@g10code.de> + + * config.links: Rename assembler file links by suffixing "-asm". + * Makefile.am (CCASCOMPILE, LTCCASCOMPILE, CLEANFILES, + libmpi_la_LIBADD, libmpi_la_DEPENDENCIES, SUFFIXES, .S.o, .S.obj, + .S.lo): Removed variables and targets. + (mpih_add1, mpih_sub1, mpih_mul1, mpih_mul2, mpih_mul3, + mpih_lshift, mpih_rshift, mpih_udiv, mpih_udiv_qrnnd, + nodist_libmpi_la_SOURCES): New variables. + (DISTCLEANFILES): Rename assembler file links by suffixing "-asm". + Add variants for C file links. + +2007-05-04 Werner Koch <wk@g10code.com> + + * config.links (path): Allowthe sue of colons as delimiters. + +2007-05-03 Werner Koch <wk@g10code.com> + + * pentium4/distfiles: Fixed. + +2007-04-30 Werner Koch <wk@g10code.com> + + * config.links: Create a file mod-source-info.h. + * Makefile.am (DISTCLEANFILES): Add that file. + * mpiutil.c (_gcry_mpi_get_hw_config): New. + +2007-04-28 Marcus Brinkmann <marcus@g10code.de> + + * config.links: Add additional assembler search directories. + +2007-03-28 Werner Koch <wk@g10code.com> + + * ec.c: New. + +2007-03-23 Werner Koch <wk@g10code.com> + + * mpi-bit.c (_gcry_mpi_lshift_limbs): Assign AP after the resize. + + * mpi-div.c (gcry_mpi_mod, _gcry_mpi_mod): Moved to .. + * mpi-mod.c: .. new file. + (_gcry_mpi_barrett_init, _gcry_mpi_barrett_free): New. + (_gcry_mpi_mod_barrett): New. + (_gcry_mpi_mul_barrett): New. + +2007-03-22 Werner Koch <wk@g10code.com> + + * mpi-div.c (_gcry_mpi_mod): New. + * mpiutil.c (_gcry_mpi_new, _gcry_mpi_snew): New. + +2007-03-13 Werner Dittmann <Werner.Dittmann@t-online.de> (wk) + + * amd64/mpih-add1.S, amd64/mpih-add1.S, amd64/mpih-lshift.S + * amd64/mpih-mul1.S, amd64/mpih-mul2.S, amd64/mpih-mul3.S + * amd64/mpih-rshift.S, amd64/mpih-sub1.S: New. + * config.links: Add case for x86_64. + +2007-02-23 Werner Koch <wk@g10code.com> + + * mpi-pow.c (gcry_mpi_powm): Remove unused var ESIGN. + + * mpiutil.c (gcry_mpi_get_flag): Let it return a value to silent + MIPSpro cc warning. + +2007-02-21 Werner Koch <wk@g10code.com> + + * mpicoder.c (_gcry_mpi_set_buffer): Made BUFFER a void*. + +2006-11-15 Werner Koch <wk@g10code.com> + + * Makefile.am (.S.o): Check for srcdir also in in CPP pass. + (INCLUDES): Removed. + (AM_CPPFLAGS, AM_CFLAGS): New, modified. Merged with Moritz' + changes. + +2006-11-05 Moritz Schulte <moritz@g10code.com> + + * Makefile.am (AM_CFLAGS): Added -I$(top_builddir)/src so that the + new gcrypt.h is used, not the one installed in the system. + +2006-10-23 Werner Koch <wk@g10code.com> + + * config.links (mpi_optional_modules): Make sure that powerpc64 is + matched before a generic powerpc. Reported by Andreas Metzler. + Should fix Debian bug 284609. + +2006-08-25 Werner Koch <wk@g10code.com> + + * mpi-bit.c (gcry_mpi_rshift): Don't shift if N == 0 but do a + plain copy. + +2006-08-04 Werner Koch <wk@g10code.com> + + * mpi-bit.c (gcry_mpi_rshift): Rewritten to remove the limitation + on N (which used to be less than BITS_PER_MPI_LIMB). + +2006-08-03 Werner Koch <wk@g10code.com> + + * mpi-bit.c (gcry_mpi_set_bit, gcry_mpi_set_highbit): Fixed + allocation. Reported by bpgcrypt at itaparica.org. + * mpiutil.c (_gcry_mpi_resize): Clear the new part of the resized + limb space. + +2006-07-26 Werner Koch <wk@g10code.com> + + * mpiutil.c (gcry_mpi_randomize): Changed P to unsigned char*. + + * mpicoder.c (gcry_mpi_scan): Changed arg BUFFER to void*. + (mpi_read_from_buffer): Made BUFFER arg const. + (gcry_mpi_scan): Removed now needless cast. Add cast for arg to + mpi_fromstr. + (gcry_mpi_print): Made TMP unsigned. + + * Makefile.am (AM_CCASFLAGS): New. + +2005-10-09 Moritz Schulte <moritz@g10code.com> + + * mpi-cmp.c (gcry_mpi_cmp_ui): Rewritten; correctly handle case of + zero limbs in U. + +2005-04-27 Moritz Schulte <moritz@g10code.com> + + * mpiutil.c (gcry_mpi_randomize): Store random data in secure + memory if the given MPI is secure - not the other way around (argl). + +2005-04-23 Moritz Schulte <moritz@g10code.com> + + * Makefile.am: Don't assume the compiler will pre-process the .S + files. Some compilers, like those from HP and IBM, don't do + this. So, we use the same solution gnupg-1.4.0 does. Preprocess + first and then compile. + + * hppa1.1/mpih-mul3.S: Add "level 1.1" directive to disable + warning about using PA-RISC1.1 opcodes. + * hppa1.1/mpih-mul2.S: Likewise. + * hppa1.1/mpih-mul1.S: Likewise. + * hppa1.1/udiv-qrnnd.S: Likewise. + +2005-02-16 Moritz Schulte <moritz@g10code.com> + + * mpiutil.c (_gcry_mpi_alloc_limb_space): Rewritten, fixed memory + corruption. + +2005-02-06 Moritz Schulte <moritz@g10code.com> + + * mpiutil.c (_gcry_mpi_get_ui, gcry_mpi_get_ui): New functions. + +2005-01-05 Werner Koch <wk@g10code.com> + + * hppa1.1/udiv-qrnnd.S: Reverted change of 2004-03-02 but kept the + .align directive. + +2004-12-16 Werner Koch <wk@g10code.com> + + * config.links (mpi_optional_modules): Move entry for powerpc64 + before generic powerpc. Suggested by Rafael Ávila de Espíndola. + +2004-03-02 Werner Koch <wk@gnupg.org> + + * hppa1.1/udiv-qrnnd.S: Alignment fix from Lamont Jones for + Debian. Taken from gnupg-1.3. + + * longlong.h: Added PowerPC 64 bit code from GPM-4.1.2 but didn't + enable it yet. Some whitespace changes in HPPA to fix assembler + problems on HP-UX. From gnupg 1.3 + + * mpiutil.c (_gcry_mpi_alloc_limb_space): Better allocate + something even if NLIMBS is passed as 0. + + * config.links: Updated system list to match gnupg 1.3. + +2003-12-19 Werner Koch <wk@gnupg.org> + + * mpi-internal.h [M_DEBUG]: Removed this unused code. + (struct karatsuba_ctx): Added TSPACE_NLIMBS and TP_NLIMBS. + * mpiutil.c (_gcry_mpi_free_limb_space): Add arg NLIMBS and wipe + out the memory. Changed all callers. + * mpih-mul.c (_gcry_mpih_mul_karatsuba_case): Keep track of + allocated limbs. + * mpi-div.c (_gcry_mpi_tdiv_qr): Keep track of allocated limbs. + * mpi-mul.c (gcry_mpi_mul): Ditto. + * mpi-pow.c (gcry_mpi_powm): Ditto. + + * Manifest: Empty new file. Also add Manifest files to all CPU + specific directories. + * Makefile.am: Added. + + * mpiutil.c (gcry_mpi_randomize): Use gcry_create_nonce if WEAK + random has been requested. + +2003-10-31 Werner Koch <wk@gnupg.org> + + * i386/mpih-rshift.S, i386/mpih-lshift.S: Use %dl and not %edx for + testb; this avoids an assembler warning. + + * mpi-pow.c (gcry_mpi_powm): s/exp/expo/ to avoid shadowing warning. + +2003-08-19 Marcus Brinkmann <marcus@g10code.de> + + * Makefile.am (SUFFIXES): New variable. + (.S.o, .S.lo, .S.obj): Rewritten. + +2003-07-30 Moritz Schulte <moritz@g10code.com> + + * longlong.h (__clz_tab): Renamed to _gcry_clz_tab. + * mpi-bit.c (__clz_tab): Likewise. + +2003-07-27 Werner Koch <wk@gnupg.org> + + * mpicoder.c (gcry_mpi_scan): New argument BUFLEN to replace the + use of the intial value of NBYTES. Changed BUFFER to unsigned. + (gcry_mpi_print): Likewise. + (gcry_mpi_dump): New. + (_gcry_log_mpidump): Make use of gcry_mpi_dump. + (mpi_print): Removed. + (gcry_mpi_scan): Allocated mpi in secure memory when required. + (gcry_mpi_aprint): Changed BUFFER to unsigned char*. + +2003-07-14 Moritz Schulte <moritz@g10code.com> + + * mpicoder.c: Used gcry_err* wrappers for libgpg-error symbols. + +2003-06-16 Moritz Schulte <moritz@g10code.com> + + * mpi-add.c: Replace last occurences of old type names with newer + names (i.e. replace MPI with gcry_mpi_t). + * mpi-bit.c: Likewise. + * mpi-cmp.c: Likewise. + * mpi-div.c: Likewise. + * mpi-gcd.c: Likewise. + * mpi-internal.h: Likewise. + * mpi-inv.c: Likewise. + * mpi-mpow.c: Likewise. + * mpi-mul.c: Likewise. + * mpi-pow.c: Likewise. + * mpi-scan.c: Likewise. + * mpicoder.c: Likewise. + * mpiutil.c: Likewise. + +2003-06-09 Moritz Schulte <moritz@g10code.com> + + * mpicoder.c (gcry_mpi_scan): Adjust for libgpg-error. + (gcry_mpi_print): Likewise. + (gcry_mpi_aprint): Likewise. + +2003-06-07 Moritz Schulte <moritz@g10code.com> + + * longlong.h, mpi-add.c, mpi-bit.c, mpi-cmp.c, mpi-div.c, + mpi-gcd.c, mpi-inline.c, mpi-inline.h, mpi-internal.h, mpi-inv.c, + mpi-mpow.c, mpi-mul.c, mpi-pow.c, mpi-scan.c, mpicoder.c, + mpih-div.c, mpih-mul.c, mpiutil.c, generic/mpi-asm-defs.h, + generic/mpih-add1.c, generic/mpih-lshift.c, generic/mpih-mul1.c, + generic/mpih-mul2.c, generic/mpih-mul3.c, generic/mpih-rshift.c, + generic/mpih-sub1.c, generic/udiv-w-sdiv.c, i386/syntax.h, + m68k/syntax.h, mips3/mpi-asm-defs.h, powerpc32/syntax.h: Edited + all preprocessor instructions to remove whitespace before the '#'. + This is not required by C89, but there are some compilers out + there that don't like it. Replaced any occurence of the now + deprecated type names with the new ones. + +2003-05-21 Moritz Schulte <moritz@g10code.com> + + * mpiutil.c (_gcry_mpi_alloc_limb_space): Only try to allocate + memory in case the amount of bytes to allocate is non-zero. + +2003-04-27 Moritz Schulte <moritz@g10code.com> + + * mpiutil.c (_gcry_mpi_resize): Allocate secure memory, in case + bit zero of `flags' is set. + + * mpi-add.c (gcry_mpi_sub): Simplify function; always use a + temporary variable now. + +2003-04-15 Werner Koch <wk@gnupg.org> + + * longlong.h (umul_ppmm): Support SH3 and SH4. Thanks to + kazuya.s@jp.yokogawa.com. + +2003-04-02 Werner Koch <wk@gnupg.org> + + * mpicoder.c (gcry_mpi_print): Fixed testing against possible + uninitialized LEN. Valgrinded by Nikos Mavroyanopoulos. + +2003-01-15 Werner Koch <wk@gnupg.org> + + * longlong.h: Removed some spaces between backslashes and newlines. + +2002-09-20 Werner Koch <wk@gnupg.org> + + * mpi-mul.c (gcry_mpi_mul_2exp): New. This was declared in + gcrypt.h but only implemented as internal function. Noted by Timo + but a few minutes to late for today's release. + + * Makefile.am (DISTCLEANFILES): Include mpi-asm-defs.h + +2002-09-18 Werner Koch <wk@gnupg.org> + + * Makefile.am (.S.lo): Pass -DPIC. i386, PPC and Sparc code + require it. It worked for me because I am using the i586 code. + +2002-08-23 Werner Koch <wk@gnupg.org> + + * Makefile.am (.S.lo): Fixed for libtool build with --disable-shared. + +2002-07-24 Werner Koch <wk@gnupg.org> + + * longlong.h: Replaced all K&R multiline strings by ISO ones for + the sake of modern compilers. Suggested by Marco Parrone. + +2002-06-24 Werner Koch <wk@gnupg.org> + + * mpiutil.c (gcry_mpi_swap): New. + + * mpi-div.c (gcry_mpi_div): New. + (gcry_mpi_mod): New. + * mpi-inv.c (gcry_mpi_invm): New. + + * mpicoder.c (do_get_buffer): Make sure that we allocate at least + one byte. + +2002-06-12 Werner Koch <wk@gnupg.org> + + * hppa1.1/udiv-qrnnd.S: Changes for PIC by Randolph Chung. + +2002-05-15 Werner Koch <wk@gnupg.org> + + * config.links: Chnage the way the mpi modules are determined. + * Makefile.am: Revamped to better handle modules + +2002-05-14 Werner Koch <wk@gnupg.org> + + Changed license of all files to the LGPL. + +2002-04-18 Werner Koch <wk@gnupg.org> + + * mpicoder.c (gcry_mpi_scan): Don't use normalize on a NULL MPI. + +2002-03-20 Werner Koch <wk@gnupg.org> + + * mpicoder.c (mpi_read_from_buffer): Bail out on a zero length + buffer because we can't eventually do an malloc of this size. + Reported by Timo. + +2002-01-14 Werner Koch <wk@gnupg.org> + + * mpi-inv.c (_gcry_mpi_invm): Typo fixes, noted by Carlo Perassi. + +2001-11-01 Werner Koch <wk@gnupg.org> + + * mpicoder.c (gcry_mpi_scan): Allow to pass a nbytes as NULL or + with value 0 for format GCRY_FMT_SSH, so that the length is not + used for any checks, only the length stored in the bufer is used. + This is a nice format becuase we can just pass a buffer around and + don't need to care about its length. + +2001-08-03 Werner Koch <wk@gnupg.org> + + * config.links: Changed the way the list of files to be + symlinked is returned. + +2001-05-31 Werner Koch <wk@gnupg.org> + + * mpih-cmp.c: Removed and moved mpihelp_cmp to .. + * mpi-inline.h: .. here. + + Major function renaming. All global functions are now prefixed + with _gcry_ or gcry_. Renamed also all mpihelp_ to just mpih_ so + that functions names are not getting to long an unreadable and for + better matching with the filenames. + +2001-05-28 Werner Koch <wk@gnupg.org> + + * mpicoder.c (mpi_fromstr): Made static and assume that all input + is in hexformat. + + Updated all CPU specific code with the one from GnuPG-1.0.5. This + is just a change of text formatting and the use of .label + instead of labels for hppa and pa7100. + + * longlong.h: Fixes for ARM by Phil Blundell. + +2001-03-29 Werner Koch <wk@gnupg.org> + + * mpi-mul.c (mpi_mul): Make sure that secret temporary results are + not stored in w. Suggested by Florian Weimer. + + * config.links: Use i386 code for i386. According to tests by + Kevin Ryde the i586 code runs slow on i386 CPUs. Ditto for i786. + +2001-01-11 Werner Koch <wk@gnupg.org> + + * Makefile.am: Removed mpi.h. + +2000-12-19 Werner Koch <wk@gnupg.org> + + * mpi-internal.h: Put limb_t definition in an ifdef. + + Major change: + Removed all GnuPG stuff and renamed this piece of software + to gcrypt. + +2000-11-14 Werner Koch <wk@gnupg.org> + + * mpi-internal.h, mpi.h: Changed the way they are called and + introduced DID_MPI_LIMP_TYPEDEF hack. Very ugly, should all be + revamped. + + * Makefile.am (OMIT_DEPENDENCIES): Hack to work around dependency + problems. + +2000-10-11 Werner Koch <wk@gnupg.org> + + * generic/mpi-asm-defs.h: New. + * mips3/mpi-asm-defs.h: New. + * config.links: Create a link to one of the above files. + +Fri Jul 28 18:19:11 CEST 2000 Werner Koch <wk@openit.de> + + * mpicoder.c (gcry_mpi_scan): Normalize the returned MPI. + +Tue Jul 25 17:44:15 CEST 2000 Werner Koch <wk@openit.de> + + * config.links: Support for powerpc--netbsd by Gabriel Rosenkoetter. + +Mon Jul 17 16:35:47 CEST 2000 Werner Koch <wk@> + + * power/: Add all files from GMP for this CPU. Converted comments to + CPP comments because some ASes complain about ' in comments. + + * config.links: Support for BSDI 4.x; by Wayne Chapeskie. Add support + for FreeBSD 5 and made the case stmt looking nicer; by Jun Kuriyama. + Add support for NetBSD. + (sparc8): Made the search path the same as sparc9 + (sparc64-unknown-linux-gnu): use udiv module; by Adam Mitchell. + + * Makefile.am: c/SFLAGS/ASFLAGS/. This has only been used by the + powerpc and actually never passed the -Wa,foo to the cc. + + * mpih-div.c (mpihelp_divrem): The MPN_COPY_DECR copied one element + too many. This is a gmp2.0.2p9.txt patch. + + * longlong.h (umul_ppmm): Fixes for ARM-4. By Sean MacLennan. + + * mpi-internal.h (karatsuba_ctx): New. + * mpih-mul.c (mpihelp_release_karatsuba_ctx): New. + (mpihelp_mul_karatsuba_case): New. + (mpihelp_mul): Splitted to make use of the new functions. + * mpi-pow.c (mpi_powm): Make use of the new splitted function to avoid + multiple allocation of temporary memory during the karatsuba operations. + * mpi_mpow.c: Removed the unused Barrett code. + +2000-03-21 16:17:30 Werner Koch (wk@habibti.openit.de) + + * config.links: Add support for FreeBSD 5. + +Mon Jan 24 22:24:38 CET 2000 Werner Koch <wk@gnupg.de> + + * mpicoder.c (gcry_mpi_aprint): Now really returns the length. + +Mon Jan 24 13:04:28 CET 2000 Werner Koch <wk@gnupg.de> + + * mpiutil.c: Removed all memory debugging code. + + * mpicoder.c (gcry_mpi_aprint): New. + + * Replaced all m_ memory functions by g10_ ones. + +Fri Dec 31 14:06:56 CET 1999 Werner Koch <wk@gnupg.de> + + * mpi-bit.c (gcry_mpi_get_nbits): New. + + * mpiutil.c (mpi_set_secure): made static. + (gcry_mpi_get_flag): New. + (gcry_mpi_set_flag): New. + (gcry_mpi_clear_flag): New. + (mpi_set_opaque): renamed to gcry_mpi_set_opaque. + (mpi_get_opaque): renamed to gcry_mpi_get_opaque. + +Fri Dec 31 12:48:31 CET 1999 Werner Koch <wk@gnupg.de> + + * mpicoder.c (mpi_read_from_buffer): Made static. + (gcry_mpi_print): A buffer of NULL is now allowed to get the required + length back. + (mpi_get_keyid): Removed. + (mpi_print): Made static - should be removed. + +Wed Dec 8 21:58:32 CET 1999 Werner Koch <wk@gnupg.de> + + * Makefile.am (INCLUDES): Add ../gcrypt. + + * g10m.c : Removed. + + * mpicoder.c (mpi_write): Removed. + (mpi_read): Removed. + (gcry_mpi_scan): New. Taken from ../gcrypt/mpiapi.c. + (gcry_mpi_print): Ditto. + + * mpi-pow.c (mpi_powm): Renamed to ... + (gcry_mpi_powm): ... this. + + * mpiutil.c (gcry_mpi_new): New as a wrapper around the old function. + Taken from ../gcrypt/mpiapi.c. + (gcry_mpi_snew): Ditto. + (gcry_mpi_release): Ditto. + (gcry_mpi_copy): Ditto. + (gcry_mpi_set): Ditto. + (gcry_mpi_set_ui): Ditto. + (gcry_mpi_cmp): Ditto. + (gcry_mpi_cmp_ui): Ditto. + (gcry_mpi_randomize): Ditto. + + * mpicoder.c (mpi_print): Removed the nbit_info kludge. + * mpi-bits.c (mpi_get_nbits): Replaced the is_protected stuff by + checking whether it is an opaque mpi and then returns it's length + in bits. + * mpiutil.c (mpi_set_opaque): Changed the interface to take a number + of bits for the length. Adjusted all users. + (mpi_get_opaque): Ditto. + +Fri Nov 19 17:15:20 CET 1999 Werner Koch <wk@gnupg.de> + + * mpicoder.c (g10_log_mpidump): Add a temporary workaround + + * mpih-mul.c (mpihelp_mul_n): s/m_is_ecure/g10_is_secure/ + + * mpiutil.c (mpi_alloc): Remved the debug mode because it has turned + out, that this feature was not very useful in the past. Use the + new alloc functions. + (mpi_alloc_secure): Ditto. + (mpi_alloc_limb_space): Ditto. + (mpi_free_limb_space): Ditto. + (mpi_resize): Ditto. + (mpi_free): Ditto. + (mpi_set_secure): Removed the debug stuff. + (mpi_set_opaque): Ditto. + (mpi_copy): Ditto. + (mpi_alloc_set_ui): Ditto. + (mpi_m_check): Use g10_ wrapper. + +Mon Aug 30 20:38:33 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + + * config.links: Add case label for DJGPP + +Wed Jul 14 19:42:08 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + + * Makefile.am: Use .s files as temporaries, disabled other .S rules. + +Wed Jul 7 13:08:40 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + + * mpicoder.c (g10_log_mpidump): New. + + * Makefile.am: Support for libtool. + +Fri Jul 2 11:45:54 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + + * mpi-bit.c (mpi_lshift_limbs,mpi_rshift_limbs): New. + * mpi-mpow.c (barrett_mulm): New but diabled. + +Tue Jun 1 16:01:46 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * config.links (i[56]86*-*-freebsdelf*): New. + +Sun May 23 14:20:22 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * config.links (sysdep.h): Not any more conditionally created. + +Tue May 4 15:47:53 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * mpiutil.c (mpi_alloc_like): New. + +Mon Apr 26 17:48:15 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * mpih-add.c, mpih-sub.c: Removed + * mpi-inline.c: New. + * mpi-inline.h: Make it usable by mpi-inline.c. + +Sun Apr 18 10:11:28 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * mpih-mul.c (mpihelp_mul_n): Fixed use of memory region. + (mpihelp_mul): Ditto. + +Wed Apr 7 20:51:39 CEST 1999 Werner Koch <wk@isil.d.shuttle.de> + + * Makefile.am: Explicit rules to invoke cpp on *.S + +Mon Mar 8 20:47:17 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * config.links: Take advantage of the with_symbol_underscore macro. + Add support for freebsd 4. + +Wed Feb 24 11:07:27 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * mips3/mpih-sub1.S: Removed left over junk in last line. (Should I + blame me or my editor?). + +Sat Feb 13 12:04:43 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * Makefile.am: Removed the +=. Add MPI_OPT_FLAGS. + +Sat Jan 9 16:02:23 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * mpi-cmp.c (mpi_cmp_ui): Normalized the arg. + +Thu Jan 7 18:00:58 CET 1999 Werner Koch <wk@isil.d.shuttle.de> + + * mpi-bit.c (mpi_normalize): New. + (mpi_get_nbits): Normalize the MPI. + * mpi-bit.c (mpi_cmp): Normalize the MPI before the compare. + + +Tue Dec 8 13:15:16 CET 1998 Werner Koch <wk@isil.d.shuttle.de> + + * config.links: Moved the case for powerpc*linux + * powerpcp32/*.S: Removed some underscores. + +Thu Nov 26 07:27:52 1998 Werner Koch <werner.koch@guug.de> + + * config.links: Support for ppc with ELF + * powerpc32/syntax.h: New. + * powerpc32/*.S: Applied ELF patches (glibc patches) + +Tue Nov 10 19:31:37 1998 Werner Koch (wk@isil.d.shuttle.de) + + * power*/ : Started with stuff for PPC + * config.links: Some stuff for PPC. + * generic/udiv-w-sdiv.c: New but disabled. + +Tue Oct 27 12:37:46 1998 Werner Koch (wk@isil.d.shuttle.de) + + * config.links (freebsd): Fixes for FreeBSD 3.0 + +Wed Oct 14 09:59:30 1998 Werner Koch (wk@isil.d.shuttle.de) + + * config.links (freebsd): ELF patches from Jun Kuriyama. + +Thu Oct 8 13:28:17 1998 Werner Koch (wk@isil.d.shuttle.de) + + * mpi-mpow.c (mpi_mulpowm): Fixed mem leak (m_free/mpi_free). + +Thu Sep 17 18:08:50 1998 Werner Koch (wk@(none)) + + * hppa1.1/udiv-qrnnd.S: Fix from Steffen Zahn for HPUX 10.20 + +Thu Aug 6 16:39:28 1998 Werner Koch,mobil,,, (wk@tobold) + + * mpi-bit.c (mpi_set_bytes): Removed. + +Wed Aug 5 15:11:12 1998 Werner Koch (wk@(none)) + + * mpicoder.c (mpi_read_from_buffer): New. + + * mpiutil.c (mpi_set_opaque): New. + (mpi_get_opaque): New. + (mpi_copy): Changed to support opauqe flag + (mpi_free): Ditto. + +Sat Jul 4 10:11:11 1998 Werner Koch (wk@isil.d.shuttle.de) + + * mpiutil.c (mpi_clear): Reset flags. + (mpi_set): Ditto. + (mpi_alloc_secure): Set flag to 1 and not ored the 1 in, tsss.. + +Fri Jun 26 11:19:06 1998 Werner Koch (wk@isil.d.shuttle.de) + + * mpiutil.c (mpi_alloc): set nbits to 0. + (mpi_alloc_secure): Ditto. + (mpi_clear): Ditto. + +Thu Jun 25 11:50:01 1998 Werner Koch (wk@isil.d.shuttle.de) + + * mips3/*.S: New + +Mon May 18 13:47:06 1998 Werner Koch (wk@isil.d.shuttle.de) + + * config.links: split mpih-shift into mpih-[lr]shift and + changed all implementations. + * mpi/alpha: add some new assembler stuff. + +Wed May 13 11:04:29 1998 Werner Koch (wk@isil.d.shuttle.de) + + * config.links: Add support for MIPS + +Thu Apr 9 11:31:36 1998 Werner Koch (wk@isil.d.shuttle.de) + + * mpicoder.c (mpi_get_secure_buffer): New. + +Wed Apr 8 09:44:33 1998 Werner Koch (wk@isil.d.shuttle.de) + + * config.links: Applied small fix from Ulf Möller. + +Mon Apr 6 12:38:52 1998 Werner Koch (wk@isil.d.shuttle.de) + + * mpicoder.c (mpi_get_buffer): Removed returned leading zeroes + and changed all callers. + +Tue Mar 10 13:40:34 1998 Werner Koch (wk@isil.d.shuttle.de) + + * mpi-bit.c (mpi_clear_highbit): New. + +Mon Mar 2 19:29:00 1998 Werner Koch (wk@isil.d.shuttle.de) + + * Makefile.am (DISTCLEANFILES): New + +Thu Feb 26 06:48:54 1998 Werner Koch (wk@isil.d.shuttle.de) + + * config.links (X86_BROKEN_ALIGN): Added for some systems. + +Mon Feb 23 12:21:40 1998 Werner Koch (wk@isil.d.shuttle.de) + + * mpi/m68k/mpih-shift.S (Lspecial): Changed duplicate symbol. + +Mon Feb 16 13:00:27 1998 Werner Koch (wk@isil.d.shuttle.de) + + * config.links : Add detection of m68k cpus + + + Copyright 1998,1999,2000,2001,2002,2003 Free Software Foundation, Inc. + + This file is free software; as a special exception the author gives + unlimited permission to copy and/or distribute it, with or without + modifications, as long as this notice is preserved. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY, to the extent permitted by law; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +Local Variables: +buffer-read-only: t +End: diff --git a/comm/third_party/libgcrypt/mpi/Makefile.am b/comm/third_party/libgcrypt/mpi/Makefile.am new file mode 100644 index 0000000000..d06594e18a --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/Makefile.am @@ -0,0 +1,179 @@ +## Process this file with automake to produce Makefile.in +# Copyright (C) 1992, 1999, 2000, 2002 Free Software Foundation, Inc. +# +# This file is part of Libgcrypt. +# +# Libgcrypt is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2.1 of +# the License, or (at your option) any later version. +# +# Libgcrypt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + +# 1.5 leads to a combinatorial explosion due to all the conditionals +# I was not able to build it with 64Megs - 1.6 fixes this. +# not anymore required: AUTOMAKE_OPTIONS = 1.6 + +# Need to include ../src in addition to top_srcdir because gcrypt.h is +# a built header. +AM_CPPFLAGS = -I../src -I$(top_srcdir)/src +AM_CFLAGS = $(GPG_ERROR_CFLAGS) + +AM_ASFLAGS = $(MPI_SFLAGS) +AM_CCASFLAGS = $(NOEXECSTACK_FLAGS) + +EXTRA_DIST = config.links +DISTCLEANFILES = mpi-asm-defs.h \ + mpih-add1-asm.S mpih-mul1-asm.S mpih-mul2-asm.S mpih-mul3-asm.S \ + mpih-lshift-asm.S mpih-rshift-asm.S mpih-sub1-asm.S asm-syntax.h \ + mpih-add1.c mpih-mul1.c mpih-mul2.c mpih-mul3.c \ + mpih-lshift.c mpih-rshift.c mpih-sub1.c \ + sysdep.h mod-source-info.h + +# Beware: The following list is not a comment but grepped by +# config.links to get the list of symlinked modules +# Optional modules are marked with an O in the second column. +#BEGIN_ASM_LIST +# mpih-add1 C +# mpih-sub1 C +# mpih-mul1 C +# mpih-mul2 C +# mpih-mul3 C +# mpih-lshift C +# mpih-rshift C +# udiv O +# udiv-qrnnd O +#END_ASM_LIST + +# Note: This function has not yet been implemented. There is only a dummy in +# generic/ +# udiv-w-sdiv O + +# And we need to have conditionals for all modules because +# we don't know whether they are .c or .S. Very ugly; I know. +# Remember to define them all in configure.ac +if MPI_MOD_ASM_MPIH_ADD1 +mpih_add1 = mpih-add1-asm.S +else +if MPI_MOD_C_MPIH_ADD1 +mpih_add1 = mpih-add1.c +else +mpih_add1 = +endif +endif + +if MPI_MOD_ASM_MPIH_SUB1 +mpih_sub1 = mpih-sub1-asm.S +else +if MPI_MOD_C_MPIH_SUB1 +mpih_sub1 = mpih-sub1.c +else +mpih_sub1 = +endif +endif + +if MPI_MOD_ASM_MPIH_MUL1 +mpih_mul1 = mpih-mul1-asm.S +else +if MPI_MOD_C_MPIH_MUL1 +mpih_mul1 = mpih-mul1.c +else +mpih_mul1 = +endif +endif + +if MPI_MOD_ASM_MPIH_MUL2 +mpih_mul2 = mpih-mul2-asm.S +else +if MPI_MOD_C_MPIH_MUL2 +mpih_mul2 = mpih-mul2.c +else +mpih_mul2 = +endif +endif + +if MPI_MOD_ASM_MPIH_MUL3 +mpih_mul3 = mpih-mul3-asm.S +else +if MPI_MOD_C_MPIH_MUL3 +mpih_mul3 = mpih-mul3.c +else +mpih_mul3 = +endif +endif + +if MPI_MOD_ASM_MPIH_LSHIFT +mpih_lshift = mpih-lshift-asm.S +else +if MPI_MOD_C_MPIH_LSHIFT +mpih_lshift = mpih-lshift.c +else +mpih_lshift = +endif +endif + +if MPI_MOD_ASM_MPIH_RSHIFT +mpih_rshift = mpih-rshift-asm.S +else +if MPI_MOD_C_MPIH_RSHIFT +mpih_rshift = mpih-rshift.c +else +mpih_rshift = +endif +endif + +if MPI_MOD_ASM_UDIV +udiv = udiv-asm.S +else +if MPI_MOD_C_UDIV +udiv = udiv.c +else +udiv = +endif +endif + +if MPI_MOD_ASM_UDIV_QRNND +udiv_qrnnd = udiv-qrnnd-asm.S +else +if MPI_MOD_C_UDIV_QRNND +udiv_qrnnd = udiv-qrnnd.c +else +udiv_qrnnd = +endif +endif + +noinst_LTLIBRARIES = libmpi.la + +libmpi_la_LDFLAGS = +nodist_libmpi_la_SOURCES = $(mpih_add1) $(mpih_sub1) $(mpih_mul1) \ + $(mpih_mul2) $(mpih_mul3) $(mpih_lshift) $(mpih_rshift) \ + $(udiv) $(udiv_qrnnd) +libmpi_la_SOURCES = longlong.h \ + mpi-add.c \ + mpi-bit.c \ + mpi-cmp.c \ + mpi-div.c \ + mpi-gcd.c \ + mpi-internal.h \ + mpi-inline.h \ + mpi-inline.c \ + mpi-inv.c \ + mpi-mul.c \ + mpi-mod.c \ + mpi-pow.c \ + mpi-mpow.c \ + mpi-scan.c \ + mpicoder.c \ + mpih-div.c \ + mpih-mul.c \ + mpih-const-time.c \ + mpiutil.c \ + ec.c ec-internal.h ec-ed25519.c +EXTRA_libmpi_la_SOURCES = asm-common-aarch64.h diff --git a/comm/third_party/libgcrypt/mpi/Makefile.in b/comm/third_party/libgcrypt/mpi/Makefile.in new file mode 100644 index 0000000000..156187ce75 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/Makefile.in @@ -0,0 +1,947 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Copyright (C) 1992, 1999, 2000, 2002 Free Software Foundation, Inc. +# +# This file is part of Libgcrypt. +# +# Libgcrypt is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2.1 of +# the License, or (at your option) any later version. +# +# Libgcrypt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + +# 1.5 leads to a combinatorial explosion due to all the conditionals +# I was not able to build it with 64Megs - 1.6 fixes this. +# not anymore required: AUTOMAKE_OPTIONS = 1.6 + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = mpi +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_cc_for_build.m4 \ + $(top_srcdir)/m4/gpg-error.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/noexecstack.m4 $(top_srcdir)/m4/socklen.m4 \ + $(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libmpi_la_LIBADD = +am_libmpi_la_OBJECTS = mpi-add.lo mpi-bit.lo mpi-cmp.lo mpi-div.lo \ + mpi-gcd.lo mpi-inline.lo mpi-inv.lo mpi-mul.lo mpi-mod.lo \ + mpi-pow.lo mpi-mpow.lo mpi-scan.lo mpicoder.lo mpih-div.lo \ + mpih-mul.lo mpih-const-time.lo mpiutil.lo ec.lo ec-ed25519.lo +@MPI_MOD_ASM_MPIH_ADD1_FALSE@@MPI_MOD_C_MPIH_ADD1_TRUE@am__objects_1 = mpih-add1.lo +@MPI_MOD_ASM_MPIH_ADD1_TRUE@am__objects_1 = mpih-add1-asm.lo +@MPI_MOD_ASM_MPIH_SUB1_FALSE@@MPI_MOD_C_MPIH_SUB1_TRUE@am__objects_2 = mpih-sub1.lo +@MPI_MOD_ASM_MPIH_SUB1_TRUE@am__objects_2 = mpih-sub1-asm.lo +@MPI_MOD_ASM_MPIH_MUL1_FALSE@@MPI_MOD_C_MPIH_MUL1_TRUE@am__objects_3 = mpih-mul1.lo +@MPI_MOD_ASM_MPIH_MUL1_TRUE@am__objects_3 = mpih-mul1-asm.lo +@MPI_MOD_ASM_MPIH_MUL2_FALSE@@MPI_MOD_C_MPIH_MUL2_TRUE@am__objects_4 = mpih-mul2.lo +@MPI_MOD_ASM_MPIH_MUL2_TRUE@am__objects_4 = mpih-mul2-asm.lo +@MPI_MOD_ASM_MPIH_MUL3_FALSE@@MPI_MOD_C_MPIH_MUL3_TRUE@am__objects_5 = mpih-mul3.lo +@MPI_MOD_ASM_MPIH_MUL3_TRUE@am__objects_5 = mpih-mul3-asm.lo +@MPI_MOD_ASM_MPIH_LSHIFT_FALSE@@MPI_MOD_C_MPIH_LSHIFT_TRUE@am__objects_6 = mpih-lshift.lo +@MPI_MOD_ASM_MPIH_LSHIFT_TRUE@am__objects_6 = mpih-lshift-asm.lo +@MPI_MOD_ASM_MPIH_RSHIFT_FALSE@@MPI_MOD_C_MPIH_RSHIFT_TRUE@am__objects_7 = mpih-rshift.lo +@MPI_MOD_ASM_MPIH_RSHIFT_TRUE@am__objects_7 = mpih-rshift-asm.lo +@MPI_MOD_ASM_UDIV_FALSE@@MPI_MOD_C_UDIV_TRUE@am__objects_8 = udiv.lo +@MPI_MOD_ASM_UDIV_TRUE@am__objects_8 = udiv-asm.lo +@MPI_MOD_ASM_UDIV_QRNND_FALSE@@MPI_MOD_C_UDIV_QRNND_TRUE@am__objects_9 = udiv-qrnnd.lo +@MPI_MOD_ASM_UDIV_QRNND_TRUE@am__objects_9 = udiv-qrnnd-asm.lo +nodist_libmpi_la_OBJECTS = $(am__objects_1) $(am__objects_2) \ + $(am__objects_3) $(am__objects_4) $(am__objects_5) \ + $(am__objects_6) $(am__objects_7) $(am__objects_8) \ + $(am__objects_9) +libmpi_la_OBJECTS = $(am_libmpi_la_OBJECTS) \ + $(nodist_libmpi_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libmpi_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(libmpi_la_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/ec-ed25519.Plo ./$(DEPDIR)/ec.Plo \ + ./$(DEPDIR)/mpi-add.Plo ./$(DEPDIR)/mpi-bit.Plo \ + ./$(DEPDIR)/mpi-cmp.Plo ./$(DEPDIR)/mpi-div.Plo \ + ./$(DEPDIR)/mpi-gcd.Plo ./$(DEPDIR)/mpi-inline.Plo \ + ./$(DEPDIR)/mpi-inv.Plo ./$(DEPDIR)/mpi-mod.Plo \ + ./$(DEPDIR)/mpi-mpow.Plo ./$(DEPDIR)/mpi-mul.Plo \ + ./$(DEPDIR)/mpi-pow.Plo ./$(DEPDIR)/mpi-scan.Plo \ + ./$(DEPDIR)/mpicoder.Plo ./$(DEPDIR)/mpih-add1-asm.Plo \ + ./$(DEPDIR)/mpih-add1.Plo ./$(DEPDIR)/mpih-const-time.Plo \ + ./$(DEPDIR)/mpih-div.Plo ./$(DEPDIR)/mpih-lshift-asm.Plo \ + ./$(DEPDIR)/mpih-lshift.Plo ./$(DEPDIR)/mpih-mul.Plo \ + ./$(DEPDIR)/mpih-mul1-asm.Plo ./$(DEPDIR)/mpih-mul1.Plo \ + ./$(DEPDIR)/mpih-mul2-asm.Plo ./$(DEPDIR)/mpih-mul2.Plo \ + ./$(DEPDIR)/mpih-mul3-asm.Plo ./$(DEPDIR)/mpih-mul3.Plo \ + ./$(DEPDIR)/mpih-rshift-asm.Plo ./$(DEPDIR)/mpih-rshift.Plo \ + ./$(DEPDIR)/mpih-sub1-asm.Plo ./$(DEPDIR)/mpih-sub1.Plo \ + ./$(DEPDIR)/mpiutil.Plo ./$(DEPDIR)/udiv-asm.Plo \ + ./$(DEPDIR)/udiv-qrnnd-asm.Plo ./$(DEPDIR)/udiv-qrnnd.Plo \ + ./$(DEPDIR)/udiv.Plo +am__mv = mv -f +CPPASCOMPILE = $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) +LTCPPASCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CCASFLAGS) $(CCASFLAGS) +AM_V_CPPAS = $(am__v_CPPAS_@AM_V@) +am__v_CPPAS_ = $(am__v_CPPAS_@AM_DEFAULT_V@) +am__v_CPPAS_0 = @echo " CPPAS " $@; +am__v_CPPAS_1 = +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libmpi_la_SOURCES) $(EXTRA_libmpi_la_SOURCES) \ + $(nodist_libmpi_la_SOURCES) +DIST_SOURCES = $(libmpi_la_SOURCES) $(EXTRA_libmpi_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BUILD_FILEVERSION = @BUILD_FILEVERSION@ +BUILD_REVISION = @BUILD_REVISION@ +BUILD_TIMESTAMP = @BUILD_TIMESTAMP@ +BUILD_VERSION = @BUILD_VERSION@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CC_FOR_BUILD = @CC_FOR_BUILD@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@ +FALLBACK_SOCKLEN_T = @FALLBACK_SOCKLEN_T@ +FGREP = @FGREP@ +GCRYPT_CIPHERS = @GCRYPT_CIPHERS@ +GCRYPT_DIGESTS = @GCRYPT_DIGESTS@ +GCRYPT_HWF_MODULES = @GCRYPT_HWF_MODULES@ +GCRYPT_KDFS = @GCRYPT_KDFS@ +GCRYPT_PUBKEY_CIPHERS = @GCRYPT_PUBKEY_CIPHERS@ +GCRYPT_RANDOM = @GCRYPT_RANDOM@ +GPGRT_CONFIG = @GPGRT_CONFIG@ +GPG_ERROR_CFLAGS = @GPG_ERROR_CFLAGS@ +GPG_ERROR_CONFIG = @GPG_ERROR_CONFIG@ +GPG_ERROR_LIBS = @GPG_ERROR_LIBS@ +GPG_ERROR_MT_CFLAGS = @GPG_ERROR_MT_CFLAGS@ +GPG_ERROR_MT_LIBS = @GPG_ERROR_MT_LIBS@ +GREP = @GREP@ +INSERT_SYS_SELECT_H = @INSERT_SYS_SELECT_H@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDADD_FOR_TESTS_KLUDGE = @LDADD_FOR_TESTS_KLUDGE@ +LDFLAGS = @LDFLAGS@ +LIBGCRYPT_CIPHERS = @LIBGCRYPT_CIPHERS@ +LIBGCRYPT_CONFIG_API_VERSION = @LIBGCRYPT_CONFIG_API_VERSION@ +LIBGCRYPT_CONFIG_CFLAGS = @LIBGCRYPT_CONFIG_CFLAGS@ +LIBGCRYPT_CONFIG_HOST = @LIBGCRYPT_CONFIG_HOST@ +LIBGCRYPT_CONFIG_LIBS = @LIBGCRYPT_CONFIG_LIBS@ +LIBGCRYPT_DIGESTS = @LIBGCRYPT_DIGESTS@ +LIBGCRYPT_LT_AGE = @LIBGCRYPT_LT_AGE@ +LIBGCRYPT_LT_CURRENT = @LIBGCRYPT_LT_CURRENT@ +LIBGCRYPT_LT_REVISION = @LIBGCRYPT_LT_REVISION@ +LIBGCRYPT_PUBKEY_CIPHERS = @LIBGCRYPT_PUBKEY_CIPHERS@ +LIBGCRYPT_THREAD_MODULES = @LIBGCRYPT_THREAD_MODULES@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPI_SFLAGS = @MPI_SFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NOEXECSTACK_FLAGS = @NOEXECSTACK_FLAGS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PTH_CFLAGS = @PTH_CFLAGS@ +PTH_CONFIG = @PTH_CONFIG@ +PTH_LIBS = @PTH_LIBS@ +RANLIB = @RANLIB@ +RC = @RC@ +RUN_LARGE_DATA_TESTS = @RUN_LARGE_DATA_TESTS@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +SYSROOT = @SYSROOT@ +VERSION = @VERSION@ +VERSION_NUMBER = @VERSION_NUMBER@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +emacs_local_vars_begin = @emacs_local_vars_begin@ +emacs_local_vars_end = @emacs_local_vars_end@ +emacs_local_vars_read_only = @emacs_local_vars_read_only@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ + +# Need to include ../src in addition to top_srcdir because gcrypt.h is +# a built header. +AM_CPPFLAGS = -I../src -I$(top_srcdir)/src +AM_CFLAGS = $(GPG_ERROR_CFLAGS) +AM_ASFLAGS = $(MPI_SFLAGS) +AM_CCASFLAGS = $(NOEXECSTACK_FLAGS) +EXTRA_DIST = config.links +DISTCLEANFILES = mpi-asm-defs.h \ + mpih-add1-asm.S mpih-mul1-asm.S mpih-mul2-asm.S mpih-mul3-asm.S \ + mpih-lshift-asm.S mpih-rshift-asm.S mpih-sub1-asm.S asm-syntax.h \ + mpih-add1.c mpih-mul1.c mpih-mul2.c mpih-mul3.c \ + mpih-lshift.c mpih-rshift.c mpih-sub1.c \ + sysdep.h mod-source-info.h + +@MPI_MOD_ASM_MPIH_ADD1_FALSE@@MPI_MOD_C_MPIH_ADD1_FALSE@mpih_add1 = +@MPI_MOD_ASM_MPIH_ADD1_FALSE@@MPI_MOD_C_MPIH_ADD1_TRUE@mpih_add1 = mpih-add1.c + +# Beware: The following list is not a comment but grepped by +# config.links to get the list of symlinked modules +# Optional modules are marked with an O in the second column. +#BEGIN_ASM_LIST +# mpih-add1 C +# mpih-sub1 C +# mpih-mul1 C +# mpih-mul2 C +# mpih-mul3 C +# mpih-lshift C +# mpih-rshift C +# udiv O +# udiv-qrnnd O +#END_ASM_LIST + +# Note: This function has not yet been implemented. There is only a dummy in +# generic/ +# udiv-w-sdiv O + +# And we need to have conditionals for all modules because +# we don't know whether they are .c or .S. Very ugly; I know. +# Remember to define them all in configure.ac +@MPI_MOD_ASM_MPIH_ADD1_TRUE@mpih_add1 = mpih-add1-asm.S +@MPI_MOD_ASM_MPIH_SUB1_FALSE@@MPI_MOD_C_MPIH_SUB1_FALSE@mpih_sub1 = +@MPI_MOD_ASM_MPIH_SUB1_FALSE@@MPI_MOD_C_MPIH_SUB1_TRUE@mpih_sub1 = mpih-sub1.c +@MPI_MOD_ASM_MPIH_SUB1_TRUE@mpih_sub1 = mpih-sub1-asm.S +@MPI_MOD_ASM_MPIH_MUL1_FALSE@@MPI_MOD_C_MPIH_MUL1_FALSE@mpih_mul1 = +@MPI_MOD_ASM_MPIH_MUL1_FALSE@@MPI_MOD_C_MPIH_MUL1_TRUE@mpih_mul1 = mpih-mul1.c +@MPI_MOD_ASM_MPIH_MUL1_TRUE@mpih_mul1 = mpih-mul1-asm.S +@MPI_MOD_ASM_MPIH_MUL2_FALSE@@MPI_MOD_C_MPIH_MUL2_FALSE@mpih_mul2 = +@MPI_MOD_ASM_MPIH_MUL2_FALSE@@MPI_MOD_C_MPIH_MUL2_TRUE@mpih_mul2 = mpih-mul2.c +@MPI_MOD_ASM_MPIH_MUL2_TRUE@mpih_mul2 = mpih-mul2-asm.S +@MPI_MOD_ASM_MPIH_MUL3_FALSE@@MPI_MOD_C_MPIH_MUL3_FALSE@mpih_mul3 = +@MPI_MOD_ASM_MPIH_MUL3_FALSE@@MPI_MOD_C_MPIH_MUL3_TRUE@mpih_mul3 = mpih-mul3.c +@MPI_MOD_ASM_MPIH_MUL3_TRUE@mpih_mul3 = mpih-mul3-asm.S +@MPI_MOD_ASM_MPIH_LSHIFT_FALSE@@MPI_MOD_C_MPIH_LSHIFT_FALSE@mpih_lshift = +@MPI_MOD_ASM_MPIH_LSHIFT_FALSE@@MPI_MOD_C_MPIH_LSHIFT_TRUE@mpih_lshift = mpih-lshift.c +@MPI_MOD_ASM_MPIH_LSHIFT_TRUE@mpih_lshift = mpih-lshift-asm.S +@MPI_MOD_ASM_MPIH_RSHIFT_FALSE@@MPI_MOD_C_MPIH_RSHIFT_FALSE@mpih_rshift = +@MPI_MOD_ASM_MPIH_RSHIFT_FALSE@@MPI_MOD_C_MPIH_RSHIFT_TRUE@mpih_rshift = mpih-rshift.c +@MPI_MOD_ASM_MPIH_RSHIFT_TRUE@mpih_rshift = mpih-rshift-asm.S +@MPI_MOD_ASM_UDIV_FALSE@@MPI_MOD_C_UDIV_FALSE@udiv = +@MPI_MOD_ASM_UDIV_FALSE@@MPI_MOD_C_UDIV_TRUE@udiv = udiv.c +@MPI_MOD_ASM_UDIV_TRUE@udiv = udiv-asm.S +@MPI_MOD_ASM_UDIV_QRNND_FALSE@@MPI_MOD_C_UDIV_QRNND_FALSE@udiv_qrnnd = +@MPI_MOD_ASM_UDIV_QRNND_FALSE@@MPI_MOD_C_UDIV_QRNND_TRUE@udiv_qrnnd = udiv-qrnnd.c +@MPI_MOD_ASM_UDIV_QRNND_TRUE@udiv_qrnnd = udiv-qrnnd-asm.S +noinst_LTLIBRARIES = libmpi.la +libmpi_la_LDFLAGS = +nodist_libmpi_la_SOURCES = $(mpih_add1) $(mpih_sub1) $(mpih_mul1) \ + $(mpih_mul2) $(mpih_mul3) $(mpih_lshift) $(mpih_rshift) \ + $(udiv) $(udiv_qrnnd) + +libmpi_la_SOURCES = longlong.h \ + mpi-add.c \ + mpi-bit.c \ + mpi-cmp.c \ + mpi-div.c \ + mpi-gcd.c \ + mpi-internal.h \ + mpi-inline.h \ + mpi-inline.c \ + mpi-inv.c \ + mpi-mul.c \ + mpi-mod.c \ + mpi-pow.c \ + mpi-mpow.c \ + mpi-scan.c \ + mpicoder.c \ + mpih-div.c \ + mpih-mul.c \ + mpih-const-time.c \ + mpiutil.c \ + ec.c ec-internal.h ec-ed25519.c + +EXTRA_libmpi_la_SOURCES = asm-common-aarch64.h +all: all-am + +.SUFFIXES: +.SUFFIXES: .S .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu mpi/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu mpi/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libmpi.la: $(libmpi_la_OBJECTS) $(libmpi_la_DEPENDENCIES) $(EXTRA_libmpi_la_DEPENDENCIES) + $(AM_V_CCLD)$(libmpi_la_LINK) $(libmpi_la_OBJECTS) $(libmpi_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ec-ed25519.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ec.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi-add.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi-bit.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi-cmp.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi-div.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi-gcd.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi-inline.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi-inv.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi-mod.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi-mpow.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi-mul.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi-pow.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpi-scan.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpicoder.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpih-add1-asm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpih-add1.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpih-const-time.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpih-div.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpih-lshift-asm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpih-lshift.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpih-mul.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpih-mul1-asm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpih-mul1.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpih-mul2-asm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpih-mul2.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpih-mul3-asm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpih-mul3.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpih-rshift-asm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpih-rshift.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpih-sub1-asm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpih-sub1.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mpiutil.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/udiv-asm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/udiv-qrnnd-asm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/udiv-qrnnd.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/udiv.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.S.o: +@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ $< + +.S.obj: +@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.S.lo: +@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(LTCPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(LTCPPASCOMPILE) -c -o $@ $< + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/ec-ed25519.Plo + -rm -f ./$(DEPDIR)/ec.Plo + -rm -f ./$(DEPDIR)/mpi-add.Plo + -rm -f ./$(DEPDIR)/mpi-bit.Plo + -rm -f ./$(DEPDIR)/mpi-cmp.Plo + -rm -f ./$(DEPDIR)/mpi-div.Plo + -rm -f ./$(DEPDIR)/mpi-gcd.Plo + -rm -f ./$(DEPDIR)/mpi-inline.Plo + -rm -f ./$(DEPDIR)/mpi-inv.Plo + -rm -f ./$(DEPDIR)/mpi-mod.Plo + -rm -f ./$(DEPDIR)/mpi-mpow.Plo + -rm -f ./$(DEPDIR)/mpi-mul.Plo + -rm -f ./$(DEPDIR)/mpi-pow.Plo + -rm -f ./$(DEPDIR)/mpi-scan.Plo + -rm -f ./$(DEPDIR)/mpicoder.Plo + -rm -f ./$(DEPDIR)/mpih-add1-asm.Plo + -rm -f ./$(DEPDIR)/mpih-add1.Plo + -rm -f ./$(DEPDIR)/mpih-const-time.Plo + -rm -f ./$(DEPDIR)/mpih-div.Plo + -rm -f ./$(DEPDIR)/mpih-lshift-asm.Plo + -rm -f ./$(DEPDIR)/mpih-lshift.Plo + -rm -f ./$(DEPDIR)/mpih-mul.Plo + -rm -f ./$(DEPDIR)/mpih-mul1-asm.Plo + -rm -f ./$(DEPDIR)/mpih-mul1.Plo + -rm -f ./$(DEPDIR)/mpih-mul2-asm.Plo + -rm -f ./$(DEPDIR)/mpih-mul2.Plo + -rm -f ./$(DEPDIR)/mpih-mul3-asm.Plo + -rm -f ./$(DEPDIR)/mpih-mul3.Plo + -rm -f ./$(DEPDIR)/mpih-rshift-asm.Plo + -rm -f ./$(DEPDIR)/mpih-rshift.Plo + -rm -f ./$(DEPDIR)/mpih-sub1-asm.Plo + -rm -f ./$(DEPDIR)/mpih-sub1.Plo + -rm -f ./$(DEPDIR)/mpiutil.Plo + -rm -f ./$(DEPDIR)/udiv-asm.Plo + -rm -f ./$(DEPDIR)/udiv-qrnnd-asm.Plo + -rm -f ./$(DEPDIR)/udiv-qrnnd.Plo + -rm -f ./$(DEPDIR)/udiv.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/ec-ed25519.Plo + -rm -f ./$(DEPDIR)/ec.Plo + -rm -f ./$(DEPDIR)/mpi-add.Plo + -rm -f ./$(DEPDIR)/mpi-bit.Plo + -rm -f ./$(DEPDIR)/mpi-cmp.Plo + -rm -f ./$(DEPDIR)/mpi-div.Plo + -rm -f ./$(DEPDIR)/mpi-gcd.Plo + -rm -f ./$(DEPDIR)/mpi-inline.Plo + -rm -f ./$(DEPDIR)/mpi-inv.Plo + -rm -f ./$(DEPDIR)/mpi-mod.Plo + -rm -f ./$(DEPDIR)/mpi-mpow.Plo + -rm -f ./$(DEPDIR)/mpi-mul.Plo + -rm -f ./$(DEPDIR)/mpi-pow.Plo + -rm -f ./$(DEPDIR)/mpi-scan.Plo + -rm -f ./$(DEPDIR)/mpicoder.Plo + -rm -f ./$(DEPDIR)/mpih-add1-asm.Plo + -rm -f ./$(DEPDIR)/mpih-add1.Plo + -rm -f ./$(DEPDIR)/mpih-const-time.Plo + -rm -f ./$(DEPDIR)/mpih-div.Plo + -rm -f ./$(DEPDIR)/mpih-lshift-asm.Plo + -rm -f ./$(DEPDIR)/mpih-lshift.Plo + -rm -f ./$(DEPDIR)/mpih-mul.Plo + -rm -f ./$(DEPDIR)/mpih-mul1-asm.Plo + -rm -f ./$(DEPDIR)/mpih-mul1.Plo + -rm -f ./$(DEPDIR)/mpih-mul2-asm.Plo + -rm -f ./$(DEPDIR)/mpih-mul2.Plo + -rm -f ./$(DEPDIR)/mpih-mul3-asm.Plo + -rm -f ./$(DEPDIR)/mpih-mul3.Plo + -rm -f ./$(DEPDIR)/mpih-rshift-asm.Plo + -rm -f ./$(DEPDIR)/mpih-rshift.Plo + -rm -f ./$(DEPDIR)/mpih-sub1-asm.Plo + -rm -f ./$(DEPDIR)/mpih-sub1.Plo + -rm -f ./$(DEPDIR)/mpiutil.Plo + -rm -f ./$(DEPDIR)/udiv-asm.Plo + -rm -f ./$(DEPDIR)/udiv-qrnnd-asm.Plo + -rm -f ./$(DEPDIR)/udiv-qrnnd.Plo + -rm -f ./$(DEPDIR)/udiv.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libtool clean-noinstLTLIBRARIES \ + cscopelist-am ctags ctags-am distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/comm/third_party/libgcrypt/mpi/aarch64/distfiles b/comm/third_party/libgcrypt/mpi/aarch64/distfiles new file mode 100644 index 0000000000..1327bd4a71 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/aarch64/distfiles @@ -0,0 +1,6 @@ +mpih-add1.S +mpih-mul1.S +mpih-mul2.S +mpih-mul3.S +mpih-sub1.S +mpi-asm-defs.h diff --git a/comm/third_party/libgcrypt/mpi/aarch64/mpi-asm-defs.h b/comm/third_party/libgcrypt/mpi/aarch64/mpi-asm-defs.h new file mode 100644 index 0000000000..65190653bc --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/aarch64/mpi-asm-defs.h @@ -0,0 +1,4 @@ +/* This file defines some basic constants for the MPI machinery. We + * need to define the types on a per-CPU basis, so it is done with + * this file here. */ +#define BYTES_PER_MPI_LIMB (SIZEOF_UNSIGNED_LONG_LONG) diff --git a/comm/third_party/libgcrypt/mpi/aarch64/mpih-add1.S b/comm/third_party/libgcrypt/mpi/aarch64/mpih-add1.S new file mode 100644 index 0000000000..bc62cf9873 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/aarch64/mpih-add1.S @@ -0,0 +1,74 @@ +/* ARM64 add_n -- Add two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "sysdep.h" +#include "asm-syntax.h" +#include "asm-common-aarch64.h" + +/******************* + * mpi_limb_t + * _gcry_mpih_add_n( mpi_ptr_t res_ptr, x0 + * mpi_ptr_t s1_ptr, x1 + * mpi_ptr_t s2_ptr, x2 + * mpi_size_t size) w3 + */ + +.text + +.globl _gcry_mpih_add_n +ELF(.type _gcry_mpih_add_n,%function) +_gcry_mpih_add_n: + CFI_STARTPROC() + and w5, w3, #3; + adds xzr, xzr, xzr; /* clear carry flag */ + + cbz w5, .Large_loop; + +.Loop: + ldr x4, [x1], #8; + sub w3, w3, #1; + ldr x11, [x2], #8; + and w5, w3, #3; + adcs x4, x4, x11; + str x4, [x0], #8; + cbz w3, .Lend; + cbnz w5, .Loop; + +.Large_loop: + ldp x4, x6, [x1], #16; + ldp x5, x7, [x2], #16; + ldp x8, x10, [x1], #16; + ldp x9, x11, [x2], #16; + sub w3, w3, #4; + adcs x4, x4, x5; + adcs x6, x6, x7; + adcs x8, x8, x9; + adcs x10, x10, x11; + stp x4, x6, [x0], #16; + stp x8, x10, [x0], #16; + cbnz w3, .Large_loop; + +.Lend: + adc x0, xzr, xzr; + ret; + CFI_ENDPROC() +ELF(.size _gcry_mpih_add_n,.-_gcry_mpih_add_n;) diff --git a/comm/third_party/libgcrypt/mpi/aarch64/mpih-mul1.S b/comm/third_party/libgcrypt/mpi/aarch64/mpih-mul1.S new file mode 100644 index 0000000000..92fcd141b1 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/aarch64/mpih-mul1.S @@ -0,0 +1,99 @@ +/* ARM64 mul_1 -- Multiply a limb vector with a limb and store the result in + * a second limb vector. + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "sysdep.h" +#include "asm-syntax.h" +#include "asm-common-aarch64.h" + +/******************* + * mpi_limb_t + * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, x0 + * mpi_ptr_t s1_ptr, x1 + * mpi_size_t s1_size, w2 + * mpi_limb_t s2_limb) x3 + */ + +.text + +.globl _gcry_mpih_mul_1 +ELF(.type _gcry_mpih_mul_1,%function) +_gcry_mpih_mul_1: + CFI_STARTPROC() + and w5, w2, #3; + mov x4, xzr; + + cbz w5, .Large_loop; + +.Loop: + ldr x5, [x1], #8; + sub w2, w2, #1; + mul x9, x5, x3; + umulh x10, x5, x3; + and w5, w2, #3; + adds x4, x4, x9; + str x4, [x0], #8; + adc x4, x10, xzr; + + cbz w2, .Lend; + cbnz w5, .Loop; + +.Large_loop: + ldp x5, x6, [x1]; + sub w2, w2, #4; + + mul x9, x5, x3; + ldp x7, x8, [x1, #16]; + umulh x10, x5, x3; + add x1, x1, #32; + + adds x4, x4, x9; + str x4, [x0], #8; + mul x11, x6, x3; + adc x4, x10, xzr; + + umulh x12, x6, x3; + + adds x4, x4, x11; + str x4, [x0], #8; + mul x13, x7, x3; + adc x4, x12, xzr; + + umulh x14, x7, x3; + + adds x4, x4, x13; + str x4, [x0], #8; + mul x15, x8, x3; + adc x4, x14, xzr; + + umulh x16, x8, x3; + + adds x4, x4, x15; + str x4, [x0], #8; + adc x4, x16, xzr; + + cbnz w2, .Large_loop; + +.Lend: + mov x0, x4; + ret; + CFI_ENDPROC() +ELF(.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1;) diff --git a/comm/third_party/libgcrypt/mpi/aarch64/mpih-mul2.S b/comm/third_party/libgcrypt/mpi/aarch64/mpih-mul2.S new file mode 100644 index 0000000000..aa0e5a2d5c --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/aarch64/mpih-mul2.S @@ -0,0 +1,111 @@ +/* ARM64 mul_2 -- Multiply a limb vector with a limb and add the result to + * a second limb vector. + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "sysdep.h" +#include "asm-syntax.h" +#include "asm-common-aarch64.h" + +/******************* + * mpi_limb_t + * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, x0 + * mpi_ptr_t s1_ptr, x1 + * mpi_size_t s1_size, w2 + * mpi_limb_t s2_limb) x3 + */ + +.text + +.globl _gcry_mpih_addmul_1 +ELF(.type _gcry_mpih_addmul_1,%function) +_gcry_mpih_addmul_1: + CFI_STARTPROC() + and w5, w2, #3; + mov x6, xzr; + mov x7, xzr; + + cbz w5, .Large_loop; + +.Loop: + ldr x5, [x1], #8; + + mul x12, x5, x3; + ldr x4, [x0]; + umulh x13, x5, x3; + sub w2, w2, #1; + + adds x12, x12, x4; + and w5, w2, #3; + adc x13, x13, x7; + adds x12, x12, x6; + str x12, [x0], #8; + adc x6, x7, x13; + + cbz w2, .Lend; + cbnz w5, .Loop; + +.Large_loop: + ldp x5, x9, [x1], #16; + sub w2, w2, #4; + ldp x4, x8, [x0]; + + mul x12, x5, x3; + umulh x13, x5, x3; + + adds x12, x12, x4; + mul x14, x9, x3; + adc x13, x13, x7; + adds x12, x12, x6; + umulh x15, x9, x3; + str x12, [x0], #8; + adc x6, x7, x13; + + adds x14, x14, x8; + ldp x5, x9, [x1], #16; + adc x15, x15, x7; + adds x14, x14, x6; + mul x12, x5, x3; + str x14, [x0], #8; + ldp x4, x8, [x0]; + umulh x13, x5, x3; + adc x6, x7, x15; + + adds x12, x12, x4; + mul x14, x9, x3; + adc x13, x13, x7; + adds x12, x12, x6; + umulh x15, x9, x3; + str x12, [x0], #8; + adc x6, x7, x13; + + adds x14, x14, x8; + adc x15, x15, x7; + adds x14, x14, x6; + str x14, [x0], #8; + adc x6, x7, x15; + + cbnz w2, .Large_loop; + +.Lend: + mov x0, x6; + ret; + CFI_ENDPROC() +ELF(.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1;) diff --git a/comm/third_party/libgcrypt/mpi/aarch64/mpih-mul3.S b/comm/third_party/libgcrypt/mpi/aarch64/mpih-mul3.S new file mode 100644 index 0000000000..5a40b354c2 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/aarch64/mpih-mul3.S @@ -0,0 +1,124 @@ +/* ARM mul_3 -- Multiply a limb vector with a limb and subtract the result + * from a second limb vector. + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "sysdep.h" +#include "asm-syntax.h" +#include "asm-common-aarch64.h" + +/******************* + * mpi_limb_t + * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, x0 + * mpi_ptr_t s1_ptr, x1 + * mpi_size_t s1_size, w2 + * mpi_limb_t s2_limb) x3 + */ + +.text + +.globl _gcry_mpih_submul_1 +ELF(.type _gcry_mpih_submul_1,%function) +_gcry_mpih_submul_1: + CFI_STARTPROC() + and w5, w2, #3; + mov x7, xzr; + cbz w5, .Large_loop; + + subs xzr, xzr, xzr; + +.Loop: + ldr x4, [x1], #8; + cinc x7, x7, cc; + ldr x5, [x0]; + sub w2, w2, #1; + + mul x6, x4, x3; + subs x5, x5, x7; + umulh x4, x4, x3; + and w10, w2, #3; + + cset x7, cc; + subs x5, x5, x6; + add x7, x7, x4; + str x5, [x0], #8; + + cbz w2, .Loop_end; + cbnz w10, .Loop; + + cinc x7, x7, cc; + +.Large_loop: + ldp x4, x8, [x1], #16; + sub w2, w2, #4; + ldp x5, x9, [x0]; + + mul x6, x4, x3; + subs x5, x5, x7; + umulh x4, x4, x3; + + cset x7, cc; + subs x5, x5, x6; + mul x6, x8, x3; + add x7, x7, x4; + str x5, [x0], #8; + cinc x7, x7, cc; + + umulh x8, x8, x3; + + subs x9, x9, x7; + cset x7, cc; + subs x9, x9, x6; + ldp x4, x10, [x1], #16; + str x9, [x0], #8; + add x7, x7, x8; + ldp x5, x9, [x0]; + cinc x7, x7, cc; + + mul x6, x4, x3; + subs x5, x5, x7; + umulh x4, x4, x3; + + cset x7, cc; + subs x5, x5, x6; + mul x6, x10, x3; + add x7, x7, x4; + str x5, [x0], #8; + cinc x7, x7, cc; + + umulh x10, x10, x3; + + subs x9, x9, x7; + cset x7, cc; + subs x9, x9, x6; + add x7, x7, x10; + str x9, [x0], #8; + cinc x7, x7, cc; + + cbnz w2, .Large_loop; + + mov x0, x7; + ret; + +.Loop_end: + cinc x0, x7, cc; + ret; + CFI_ENDPROC() +ELF(.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1;) diff --git a/comm/third_party/libgcrypt/mpi/aarch64/mpih-sub1.S b/comm/third_party/libgcrypt/mpi/aarch64/mpih-sub1.S new file mode 100644 index 0000000000..4f279a1230 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/aarch64/mpih-sub1.S @@ -0,0 +1,74 @@ +/* ARM64 sub_n -- Subtract two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "sysdep.h" +#include "asm-syntax.h" +#include "asm-common-aarch64.h" + +/******************* + * mpi_limb_t + * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, x0 + * mpi_ptr_t s1_ptr, x1 + * mpi_ptr_t s2_ptr, x2 + * mpi_size_t size) w3 + */ + +.text + +.globl _gcry_mpih_sub_n +ELF(.type _gcry_mpih_sub_n,%function) +_gcry_mpih_sub_n: + CFI_STARTPROC() + and w5, w3, #3; + subs xzr, xzr, xzr; /* prepare carry flag for sub */ + + cbz w5, .Large_loop; + +.Loop: + ldr x4, [x1], #8; + sub w3, w3, #1; + ldr x11, [x2], #8; + and w5, w3, #3; + sbcs x4, x4, x11; + str x4, [x0], #8; + cbz w3, .Lend; + cbnz w5, .Loop; + +.Large_loop: + ldp x4, x6, [x1], #16; + ldp x5, x7, [x2], #16; + ldp x8, x10, [x1], #16; + ldp x9, x11, [x2], #16; + sub w3, w3, #4; + sbcs x4, x4, x5; + sbcs x6, x6, x7; + sbcs x8, x8, x9; + sbcs x10, x10, x11; + stp x4, x6, [x0], #16; + stp x8, x10, [x0], #16; + cbnz w3, .Large_loop; + +.Lend: + cset x0, cc; + ret; + CFI_ENDPROC() +ELF(.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n;) diff --git a/comm/third_party/libgcrypt/mpi/alpha/README b/comm/third_party/libgcrypt/mpi/alpha/README new file mode 100644 index 0000000000..00addfd396 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/alpha/README @@ -0,0 +1,53 @@ +This directory contains mpn functions optimized for DEC Alpha processors. + +RELEVANT OPTIMIZATION ISSUES + +EV4 + +1. This chip has very limited store bandwidth. The on-chip L1 cache is +write-through, and a cache line is transferred from the store buffer to the +off-chip L2 in as much 15 cycles on most systems. This delay hurts +mpn_add_n, mpn_sub_n, mpn_lshift, and mpn_rshift. + +2. Pairing is possible between memory instructions and integer arithmetic +instructions. + +3. mulq and umulh is documented to have a latency of 23 cycles, but 2 of +these cycles are pipelined. Thus, multiply instructions can be issued at a +rate of one each 21nd cycle. + +EV5 + +1. The memory bandwidth of this chip seems excellent, both for loads and +stores. Even when the working set is larger than the on-chip L1 and L2 +caches, the performance remain almost unaffected. + +2. mulq has a measured latency of 13 cycles and an issue rate of 1 each 8th +cycle. umulh has a measured latency of 15 cycles and an issue rate of 1 +each 10th cycle. But the exact timing is somewhat confusing. + +3. mpn_add_n. With 4-fold unrolling, we need 37 instructions, whereof 12 + are memory operations. This will take at least + ceil(37/2) [dual issue] + 1 [taken branch] = 20 cycles + We have 12 memory cycles, plus 4 after-store conflict cycles, or 16 data + cache cycles, which should be completely hidden in the 20 issue cycles. + The computation is inherently serial, with these dependencies: + addq + / \ + addq cmpult + | | + cmpult | + \ / + or + I.e., there is a 4 cycle path for each limb, making 16 cycles the absolute + minimum. We could replace the `or' with a cmoveq/cmovne, which would save + a cycle on EV5, but that might waste a cycle on EV4. Also, cmov takes 2 + cycles. + addq + / \ + addq cmpult + | \ + cmpult -> cmovne + +STATUS + diff --git a/comm/third_party/libgcrypt/mpi/alpha/distfiles b/comm/third_party/libgcrypt/mpi/alpha/distfiles new file mode 100644 index 0000000000..f2ab9fc3c1 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/alpha/distfiles @@ -0,0 +1,11 @@ +README +mpih-add1.S +mpih-sub1.S +mpih-mul1.S +mpih-mul2.S +mpih-mul3.S +mpih-lshift.S +mpih-rshift.S + +udiv-qrnnd.S + diff --git a/comm/third_party/libgcrypt/mpi/alpha/mpih-add1.S b/comm/third_party/libgcrypt/mpi/alpha/mpih-add1.S new file mode 100644 index 0000000000..50dbb2b9d2 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/alpha/mpih-add1.S @@ -0,0 +1,124 @@ +/* alpha add_n -- Add two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * Copyright (C) 1995, 1998, 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + + +/******************* + * mpi_limb_t + * _gcry_mpih_add_n( mpi_ptr_t res_ptr, ($16) + * mpi_ptr_t s1_ptr, ($17) + * mpi_ptr_t s2_ptr, ($18) + * mpi_size_t size) ($19) + */ + + + .set noreorder + .set noat +.text + .align 3 + .globl _gcry_mpih_add_n + .ent _gcry_mpih_add_n +_gcry_mpih_add_n: + .frame $30,0,$26,0 + + ldq $3,0($17) + ldq $4,0($18) + + subq $19,1,$19 + and $19,4-1,$2 # number of limbs in first loop + bis $31,$31,$0 + beq $2,.L0 # if multiple of 4 limbs, skip first loop + + subq $19,$2,$19 + +.Loop0: subq $2,1,$2 + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,0($16) + or $0,$1,$0 + + addq $17,8,$17 + addq $18,8,$18 + bis $5,$5,$3 + bis $6,$6,$4 + addq $16,8,$16 + bne $2,.Loop0 + +.L0: beq $19,.Lend + + .align 3 +.Loop: subq $19,4,$19 + + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,0($16) + or $0,$1,$0 + + ldq $3,16($17) + addq $6,$0,$6 + ldq $4,16($18) + cmpult $6,$0,$1 + addq $5,$6,$6 + cmpult $6,$5,$0 + stq $6,8($16) + or $0,$1,$0 + + ldq $5,24($17) + addq $4,$0,$4 + ldq $6,24($18) + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,16($16) + or $0,$1,$0 + + ldq $3,32($17) + addq $6,$0,$6 + ldq $4,32($18) + cmpult $6,$0,$1 + addq $5,$6,$6 + cmpult $6,$5,$0 + stq $6,24($16) + or $0,$1,$0 + + addq $17,32,$17 + addq $18,32,$18 + addq $16,32,$16 + bne $19,.Loop + +.Lend: addq $4,$0,$4 + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,0($16) + or $0,$1,$0 + ret $31,($26),1 + + .end _gcry_mpih_add_n + diff --git a/comm/third_party/libgcrypt/mpi/alpha/mpih-lshift.S b/comm/third_party/libgcrypt/mpi/alpha/mpih-lshift.S new file mode 100644 index 0000000000..ded4b15c00 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/alpha/mpih-lshift.S @@ -0,0 +1,122 @@ +/* alpha - left shift + * + * Copyright (C) 1994, 1995, 1998, 2001, + * 2002 Free Software Foundation, Inc. + * + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + + +/******************* + * mpi_limb_t + * _gcry_mpih_lshift( mpi_ptr_t wp, (r16) + * mpi_ptr_t up, (r17) + * mpi_size_t usize, (r18) + * unsigned cnt) (r19) + * + * This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling, + * it would take 4 cycles/limb. It should be possible to get down to 3 + * cycles/limb since both ldq and stq can be paired with the other used + * instructions. But there are many restrictions in the 21064 pipeline that + * makes it hard, if not impossible, to get down to 3 cycles/limb: + * + * 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay. + * 2. Only aligned instruction pairs can be paired. + * 3. The store buffer or silo might not be able to deal with the bandwidth. + */ + + .set noreorder + .set noat +.text + .align 3 + .globl _gcry_mpih_lshift + .ent _gcry_mpih_lshift +_gcry_mpih_lshift: + .frame $30,0,$26,0 + + s8addq $18,$17,$17 # make r17 point at end of s1 + ldq $4,-8($17) # load first limb + subq $17,8,$17 + subq $31,$19,$7 + s8addq $18,$16,$16 # make r16 point at end of RES + subq $18,1,$18 + and $18,4-1,$20 # number of limbs in first loop + srl $4,$7,$0 # compute function result + + beq $20,.L0 + subq $18,$20,$18 + + .align 3 +.Loop0: + ldq $3,-8($17) + subq $16,8,$16 + subq $17,8,$17 + subq $20,1,$20 + sll $4,$19,$5 + srl $3,$7,$6 + bis $3,$3,$4 + bis $5,$6,$8 + stq $8,0($16) + bne $20,.Loop0 + +.L0: beq $18,.Lend + + .align 3 +.Loop: ldq $3,-8($17) + subq $16,32,$16 + subq $18,4,$18 + sll $4,$19,$5 + srl $3,$7,$6 + + ldq $4,-16($17) + sll $3,$19,$1 + bis $5,$6,$8 + stq $8,24($16) + srl $4,$7,$2 + + ldq $3,-24($17) + sll $4,$19,$5 + bis $1,$2,$8 + stq $8,16($16) + srl $3,$7,$6 + + ldq $4,-32($17) + sll $3,$19,$1 + bis $5,$6,$8 + stq $8,8($16) + srl $4,$7,$2 + + subq $17,32,$17 + bis $1,$2,$8 + stq $8,0($16) + + bgt $18,.Loop + +.Lend: sll $4,$19,$8 + stq $8,-8($16) + ret $31,($26),1 + .end _gcry_mpih_lshift + + diff --git a/comm/third_party/libgcrypt/mpi/alpha/mpih-mul1.S b/comm/third_party/libgcrypt/mpi/alpha/mpih-mul1.S new file mode 100644 index 0000000000..cd91b10499 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/alpha/mpih-mul1.S @@ -0,0 +1,90 @@ +/* Alpha 21064 mpih-mul1.S -- Multiply a limb vector with a limb and store + * the result in a second limb vector. + * + * Copyright (C) 1992, 1994, 1995, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + +/******************* + * mpi_limb_t + * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, (r16) + * mpi_ptr_t s1_ptr, (r17) + * mpi_size_t s1_size, (r18) + * mpi_limb_t s2_limb) (r19) + * + * This code runs at 42 cycles/limb on the EV4 and 18 cycles/limb on the EV5. + * + * To improve performance for long multiplications, we would use + * 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use + * these instructions without slowing down the general code: 1. We can + * only have two prefetches in operation at any time in the Alpha + * architecture. 2. There will seldom be any special alignment + * between RES_PTR and S1_PTR. Maybe we can simply divide the current + * loop into an inner and outer loop, having the inner loop handle + * exactly one prefetch block? + */ + + .set noreorder + .set noat +.text + .align 3 + .globl _gcry_mpih_mul_1 + .ent _gcry_mpih_mul_1 2 +_gcry_mpih_mul_1: + .frame $30,0,$26 + + ldq $2,0($17) # $2 = s1_limb + subq $18,1,$18 # size-- + mulq $2,$19,$3 # $3 = prod_low + bic $31,$31,$4 # clear cy_limb + umulh $2,$19,$0 # $0 = prod_high + beq $18,Lend1 # jump if size was == 1 + ldq $2,8($17) # $2 = s1_limb + subq $18,1,$18 # size-- + stq $3,0($16) + beq $18,Lend2 # jump if size was == 2 + + .align 3 +Loop: mulq $2,$19,$3 # $3 = prod_low + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + subq $18,1,$18 # size-- + umulh $2,$19,$4 # $4 = cy_limb + ldq $2,16($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + addq $3,$0,$3 # $3 = cy_limb + prod_low + stq $3,8($16) + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + addq $16,8,$16 # res_ptr++ + bne $18,Loop + +Lend2: mulq $2,$19,$3 # $3 = prod_low + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + umulh $2,$19,$4 # $4 = cy_limb + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + stq $3,8($16) + addq $4,$0,$0 # cy_limb = prod_high + cy + ret $31,($26),1 +Lend1: stq $3,0($16) + ret $31,($26),1 + + .end _gcry_mpih_mul_1 + + diff --git a/comm/third_party/libgcrypt/mpi/alpha/mpih-mul2.S b/comm/third_party/libgcrypt/mpi/alpha/mpih-mul2.S new file mode 100644 index 0000000000..5eb6b98be4 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/alpha/mpih-mul2.S @@ -0,0 +1,97 @@ +/* Alpha 21064 addmul_1 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * + * Copyright (C) 1992, 1994, 1995, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + +/******************* + * mpi_limb_t + * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, (r16) + * mpi_ptr_t s1_ptr, (r17) + * mpi_size_t s1_size, (r18) + * mpi_limb_t s2_limb) (r19) + * + * This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5. + */ + + + .set noreorder + .set noat +.text + .align 3 + .globl _gcry_mpih_addmul_1 + .ent _gcry_mpih_addmul_1 2 +_gcry_mpih_addmul_1: + .frame $30,0,$26 + + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + umulh $2,$19,$0 # $0 = prod_high + beq $18,.Lend1 # jump if size was == 1 + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + addq $5,$3,$3 + cmpult $3,$5,$4 + stq $3,0($16) + addq $16,8,$16 # res_ptr++ + beq $18,.Lend2 # jump if size was == 2 + + .align 3 +.Loop: mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + subq $18,1,$18 # size-- + umulh $2,$19,$4 # $4 = cy_limb + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + addq $5,$3,$3 + cmpult $3,$5,$5 + stq $3,0($16) + addq $16,8,$16 # res_ptr++ + addq $5,$0,$0 # combine carries + bne $18,.Loop + +.Lend2: mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + umulh $2,$19,$4 # $4 = cy_limb + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + addq $5,$3,$3 + cmpult $3,$5,$5 + stq $3,0($16) + addq $5,$0,$0 # combine carries + addq $4,$0,$0 # cy_limb = prod_high + cy + ret $31,($26),1 +.Lend1: addq $5,$3,$3 + cmpult $3,$5,$5 + stq $3,0($16) + addq $0,$5,$0 + ret $31,($26),1 + + .end _gcry_mpih_addmul_1 + diff --git a/comm/third_party/libgcrypt/mpi/alpha/mpih-mul3.S b/comm/third_party/libgcrypt/mpi/alpha/mpih-mul3.S new file mode 100644 index 0000000000..7d5d2afe41 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/alpha/mpih-mul3.S @@ -0,0 +1,95 @@ +/* Alpha 21064 submul_1 -- Multiply a limb vector with a limb and + * subtract the result from a second limb vector. + * Copyright (C) 1992, 1994, 1995, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + +/******************* + * mpi_limb_t + * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, (r16 ) + * mpi_ptr_t s1_ptr, (r17 ) + * mpi_size_t s1_size, (r18 ) + * mpi_limb_t s2_limb) (r19 ) + * + * This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5. + */ + + .set noreorder + .set noat +.text + .align 3 + .globl _gcry_mpih_submul_1 + .ent _gcry_mpih_submul_1 2 +_gcry_mpih_submul_1: + .frame $30,0,$26 + + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + umulh $2,$19,$0 # $0 = prod_high + beq $18,.Lend1 # jump if size was == 1 + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + subq $5,$3,$3 + cmpult $5,$3,$4 + stq $3,0($16) + addq $16,8,$16 # res_ptr++ + beq $18,.Lend2 # jump if size was == 2 + + .align 3 +.Loop: mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + subq $18,1,$18 # size-- + umulh $2,$19,$4 # $4 = cy_limb + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + subq $5,$3,$3 + cmpult $5,$3,$5 + stq $3,0($16) + addq $16,8,$16 # res_ptr++ + addq $5,$0,$0 # combine carries + bne $18,.Loop + +.Lend2: mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + addq $4,$0,$0 # cy_limb = cy_limb + 'cy' + umulh $2,$19,$4 # $4 = cy_limb + addq $3,$0,$3 # $3 = cy_limb + prod_low + cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) + subq $5,$3,$3 + cmpult $5,$3,$5 + stq $3,0($16) + addq $5,$0,$0 # combine carries + addq $4,$0,$0 # cy_limb = prod_high + cy + ret $31,($26),1 +.Lend1: subq $5,$3,$3 + cmpult $5,$3,$5 + stq $3,0($16) + addq $0,$5,$0 + ret $31,($26),1 + + .end _gcry_mpih_submul_1 + diff --git a/comm/third_party/libgcrypt/mpi/alpha/mpih-rshift.S b/comm/third_party/libgcrypt/mpi/alpha/mpih-rshift.S new file mode 100644 index 0000000000..f0c9814388 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/alpha/mpih-rshift.S @@ -0,0 +1,118 @@ +/* alpha rshift + * Copyright (C) 1994, 1995, 1998, 1999, + * 2000, 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + + + +/******************* + * mpi_limb_t + * _gcry_mpih_rshift( mpi_ptr_t wp, (r16) + * mpi_ptr_t up, (r17) + * mpi_size_t usize, (r18) + * unsigned cnt) (r19) + * + * This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling, + * it would take 4 cycles/limb. It should be possible to get down to 3 + * cycles/limb since both ldq and stq can be paired with the other used + * instructions. But there are many restrictions in the 21064 pipeline that + * makes it hard, if not impossible, to get down to 3 cycles/limb: + * + * 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay. + * 2. Only aligned instruction pairs can be paired. + * 3. The store buffer or silo might not be able to deal with the bandwidth. + */ + + .set noreorder + .set noat +.text + .align 3 + .globl _gcry_mpih_rshift + .ent _gcry_mpih_rshift +_gcry_mpih_rshift: + .frame $30,0,$26,0 + + ldq $4,0($17) # load first limb + addq $17,8,$17 + subq $31,$19,$7 + subq $18,1,$18 + and $18,4-1,$20 # number of limbs in first loop + sll $4,$7,$0 # compute function result + + beq $20,.R0 + subq $18,$20,$18 + + .align 3 +.Roop0: + ldq $3,0($17) + addq $16,8,$16 + addq $17,8,$17 + subq $20,1,$20 + srl $4,$19,$5 + sll $3,$7,$6 + bis $3,$3,$4 + bis $5,$6,$8 + stq $8,-8($16) + bne $20,.Roop0 + +.R0: beq $18,.Rend + + .align 3 +.Roop: ldq $3,0($17) + addq $16,32,$16 + subq $18,4,$18 + srl $4,$19,$5 + sll $3,$7,$6 + + ldq $4,8($17) + srl $3,$19,$1 + bis $5,$6,$8 + stq $8,-32($16) + sll $4,$7,$2 + + ldq $3,16($17) + srl $4,$19,$5 + bis $1,$2,$8 + stq $8,-24($16) + sll $3,$7,$6 + + ldq $4,24($17) + srl $3,$19,$1 + bis $5,$6,$8 + stq $8,-16($16) + sll $4,$7,$2 + + addq $17,32,$17 + bis $1,$2,$8 + stq $8,-8($16) + + bgt $18,.Roop + +.Rend: srl $4,$19,$8 + stq $8,0($16) + ret $31,($26),1 + .end _gcry_mpih_rshift + diff --git a/comm/third_party/libgcrypt/mpi/alpha/mpih-sub1.S b/comm/third_party/libgcrypt/mpi/alpha/mpih-sub1.S new file mode 100644 index 0000000000..9a644468cd --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/alpha/mpih-sub1.S @@ -0,0 +1,124 @@ +/* Alpha sub_n -- Subtract two limb vectors of the same length > 0 and + * store difference in a third limb vector. + * Copyright (C) 1995, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + +/******************* + * mpi_limb_t + * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, (r16) + * mpi_ptr_t s1_ptr, (r17) + * mpi_ptr_t s2_ptr, (r18) + * mpi_size_t size) (r19) + */ + + .set noreorder + .set noat +.text + .align 3 + .globl _gcry_mpih_sub_n + .ent _gcry_mpih_sub_n +_gcry_mpih_sub_n: + .frame $30,0,$26,0 + + ldq $3,0($17) + ldq $4,0($18) + + subq $19,1,$19 + and $19,4-1,$2 # number of limbs in first loop + bis $31,$31,$0 + beq $2,.L0 # if multiple of 4 limbs, skip first loop + + subq $19,$2,$19 + +.Loop0: subq $2,1,$2 + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + subq $3,$4,$4 + cmpult $3,$4,$0 + stq $4,0($16) + or $0,$1,$0 + + addq $17,8,$17 + addq $18,8,$18 + bis $5,$5,$3 + bis $6,$6,$4 + addq $16,8,$16 + bne $2,.Loop0 + +.L0: beq $19,.Lend + + .align 3 +.Loop: subq $19,4,$19 + + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + subq $3,$4,$4 + cmpult $3,$4,$0 + stq $4,0($16) + or $0,$1,$0 + + ldq $3,16($17) + addq $6,$0,$6 + ldq $4,16($18) + cmpult $6,$0,$1 + subq $5,$6,$6 + cmpult $5,$6,$0 + stq $6,8($16) + or $0,$1,$0 + + ldq $5,24($17) + addq $4,$0,$4 + ldq $6,24($18) + cmpult $4,$0,$1 + subq $3,$4,$4 + cmpult $3,$4,$0 + stq $4,16($16) + or $0,$1,$0 + + ldq $3,32($17) + addq $6,$0,$6 + ldq $4,32($18) + cmpult $6,$0,$1 + subq $5,$6,$6 + cmpult $5,$6,$0 + stq $6,24($16) + or $0,$1,$0 + + addq $17,32,$17 + addq $18,32,$18 + addq $16,32,$16 + bne $19,.Loop + +.Lend: addq $4,$0,$4 + cmpult $4,$0,$1 + subq $3,$4,$4 + cmpult $3,$4,$0 + stq $4,0($16) + or $0,$1,$0 + ret $31,($26),1 + + .end _gcry_mpih_sub_n + + diff --git a/comm/third_party/libgcrypt/mpi/alpha/udiv-qrnnd.S b/comm/third_party/libgcrypt/mpi/alpha/udiv-qrnnd.S new file mode 100644 index 0000000000..dd0c52d7de --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/alpha/udiv-qrnnd.S @@ -0,0 +1,159 @@ +/* Alpha 21064 __udiv_qrnnd + * + * Copyright (C) 1992, 1994, 1995, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + + + .set noreorder + .set noat +.text + .align 3 + .globl __udiv_qrnnd + .ent __udiv_qrnnd +__udiv_qrnnd: + .frame $30,0,$26,0 + .prologue 0 +#define cnt $2 +#define tmp $3 +#define rem_ptr $16 +#define n1 $17 +#define n0 $18 +#define d $19 +#define qb $20 + + ldiq cnt,16 + blt d,.Largedivisor + +.Loop1: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,.Loop1 + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +.Largedivisor: + and n0,1,$4 + + srl n0,1,n0 + sll n1,63,tmp + or tmp,n0,n0 + srl n1,1,n1 + + and d,1,$6 + srl d,1,$5 + addq $5,$6,$5 + +.Loop2: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,.Loop2 + + addq n1,n1,n1 + addq $4,n1,n1 + bne $6,.LOdd + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +.LOdd: + /* q' in n0. r' in n1 */ + addq n1,n0,n1 + cmpult n1,n0,tmp # tmp := carry from addq + beq tmp,.LLp6 + addq n0,1,n0 + subq n1,d,n1 +.LLp6: cmpult n1,d,tmp + bne tmp,.LLp7 + addq n0,1,n0 + subq n1,d,n1 +.LLp7: + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + + .end __udiv_qrnnd diff --git a/comm/third_party/libgcrypt/mpi/amd64/distfiles b/comm/third_party/libgcrypt/mpi/amd64/distfiles new file mode 100644 index 0000000000..44aad5f829 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/amd64/distfiles @@ -0,0 +1,9 @@ +func_abi.h +mpih-add1.S +mpih-lshift.S +mpih-mul1.S +mpih-mul2.S +mpih-mul3.S +mpih-rshift.S +mpih-sub1.S +mpi-asm-defs.h diff --git a/comm/third_party/libgcrypt/mpi/amd64/func_abi.h b/comm/third_party/libgcrypt/mpi/amd64/func_abi.h new file mode 100644 index 0000000000..a60363e4e4 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/amd64/func_abi.h @@ -0,0 +1,56 @@ +#include <config.h> + +#ifdef __x86_64__ +#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES +# define CFI_STARTPROC() .cfi_startproc +# define CFI_ENDPROC() .cfi_endproc +# define CFI_ADJUST_CFA_OFFSET(off) .cfi_adjust_cfa_offset off +# define CFI_REL_OFFSET(reg,off) .cfi_rel_offset reg, off +# define CFI_RESTORE(reg) .cfi_restore reg + +# define CFI_PUSH(reg) \ + CFI_ADJUST_CFA_OFFSET(8); CFI_REL_OFFSET(reg, 0) +# define CFI_POP(reg) \ + CFI_ADJUST_CFA_OFFSET(-8); CFI_RESTORE(reg) +#else +# define CFI_STARTPROC() +# define CFI_ENDPROC() +# define CFI_ADJUST_CFA_OFFSET(off) +# define CFI_REL_OFFSET(reg,off) +# define CFI_RESTORE(reg) + +# define CFI_PUSH(reg) +# define CFI_POP(reg) +#endif +#endif + +#ifdef USE_MS_ABI + /* Store registers and move four first input arguments from MS ABI to + * SYSV ABI. */ + #define FUNC_ENTRY() \ + CFI_STARTPROC(); \ + pushq %rsi; \ + CFI_PUSH(%rsi); \ + pushq %rdi; \ + CFI_PUSH(%rdi); \ + movq %rdx, %rsi; \ + movq %rcx, %rdi; \ + movq %r8, %rdx; \ + movq %r9, %rcx; + + /* Restore registers. */ + #define FUNC_EXIT() \ + popq %rdi; \ + CFI_POP(%rdi); \ + popq %rsi; \ + CFI_POP(%rsi); \ + ret; \ + CFI_ENDPROC(); +#else + #define FUNC_ENTRY() \ + CFI_STARTPROC(); + + #define FUNC_EXIT() \ + ret; \ + CFI_ENDPROC(); +#endif diff --git a/comm/third_party/libgcrypt/mpi/amd64/mpi-asm-defs.h b/comm/third_party/libgcrypt/mpi/amd64/mpi-asm-defs.h new file mode 100644 index 0000000000..65190653bc --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/amd64/mpi-asm-defs.h @@ -0,0 +1,4 @@ +/* This file defines some basic constants for the MPI machinery. We + * need to define the types on a per-CPU basis, so it is done with + * this file here. */ +#define BYTES_PER_MPI_LIMB (SIZEOF_UNSIGNED_LONG_LONG) diff --git a/comm/third_party/libgcrypt/mpi/amd64/mpih-add1.S b/comm/third_party/libgcrypt/mpi/amd64/mpih-add1.S new file mode 100644 index 0000000000..157e5f1e0d --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/amd64/mpih-add1.S @@ -0,0 +1,64 @@ +/* AMD64 (x86_64) add_n -- Add two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 1992, 1994, 1995, 1998, + * 2001, 2002, 2006 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_add_n( mpi_ptr_t res_ptr, rdi + * mpi_ptr_t s1_ptr, rsi + * mpi_ptr_t s2_ptr, rdx + * mpi_size_t size) rcx + */ + +.text + .globl C_SYMBOL_NAME(_gcry_mpih_add_n) +C_SYMBOL_NAME(_gcry_mpih_add_n:) + FUNC_ENTRY() + leaq (%rsi,%rcx,8), %rsi + leaq (%rdi,%rcx,8), %rdi + leaq (%rdx,%rcx,8), %rdx + negq %rcx + xorl %eax, %eax /* clear cy */ + + ALIGN(4) /* minimal alignment for claimed speed */ +.Loop: movq (%rsi,%rcx,8), %rax + movq (%rdx,%rcx,8), %r10 + adcq %r10, %rax + movq %rax, (%rdi,%rcx,8) + incq %rcx + jne .Loop + + movq %rcx, %rax /* zero %rax */ + adcq %rax, %rax + FUNC_EXIT() + ret diff --git a/comm/third_party/libgcrypt/mpi/amd64/mpih-lshift.S b/comm/third_party/libgcrypt/mpi/amd64/mpih-lshift.S new file mode 100644 index 0000000000..76e9408fcd --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/amd64/mpih-lshift.S @@ -0,0 +1,79 @@ +/* AMD64 (x86_64) lshift -- Left shift a limb vector and store + * result in a second limb vector. + * + * Copyright (C) 1992, 1994, 1995, 1998, + * 2001, 2002, 2006 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + +/******************* + * mpi_limb_t + * _gcry_mpih_lshift( mpi_ptr_t wp, rdi + * mpi_ptr_t up, rsi + * mpi_size_t usize, rdx + * unsigned cnt) rcx + */ + +.text + .globl C_SYMBOL_NAME(_gcry_mpih_lshift) +C_SYMBOL_NAME(_gcry_mpih_lshift:) + FUNC_ENTRY() + /* Note: %xmm6 and %xmm7 not used for WIN64 ABI compatibility. */ + movq -8(%rsi,%rdx,8), %xmm4 + movd %ecx, %xmm1 + movl $64, %eax + subl %ecx, %eax + movd %eax, %xmm0 + movdqa %xmm4, %xmm3 + psrlq %xmm0, %xmm4 + movd %xmm4, %rax + subq $2, %rdx + jl .Lendo + + ALIGN(4) /* minimal alignment for claimed speed */ +.Loop: movq (%rsi,%rdx,8), %xmm5 + movdqa %xmm5, %xmm2 + psrlq %xmm0, %xmm5 + psllq %xmm1, %xmm3 + por %xmm5, %xmm3 + movq %xmm3, 8(%rdi,%rdx,8) + je .Lende + movq -8(%rsi,%rdx,8), %xmm4 + movdqa %xmm4, %xmm3 + psrlq %xmm0, %xmm4 + psllq %xmm1, %xmm2 + por %xmm4, %xmm2 + movq %xmm2, (%rdi,%rdx,8) + subq $2, %rdx + jge .Loop + +.Lendo: movdqa %xmm3, %xmm2 +.Lende: psllq %xmm1, %xmm2 + movq %xmm2, (%rdi) + FUNC_EXIT() + ret diff --git a/comm/third_party/libgcrypt/mpi/amd64/mpih-mul1.S b/comm/third_party/libgcrypt/mpi/amd64/mpih-mul1.S new file mode 100644 index 0000000000..67ab47eab6 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/amd64/mpih-mul1.S @@ -0,0 +1,67 @@ +/* AMD64 mul_1 -- Multiply a limb vector with a limb and store + * the result in a second limb vector. + * Copyright (C) 1992, 1994, 1998, + * 2001, 2002, 2006 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + +/******************* + * mpi_limb_t + * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, (rdi) + * mpi_ptr_t s1_ptr, (rsi) + * mpi_size_t s1_size, (rdx) + * mpi_limb_t s2_limb) (rcx) + */ + + + TEXT + ALIGN(5) + .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + + GLOBL C_SYMBOL_NAME(_gcry_mpih_mul_1) +C_SYMBOL_NAME(_gcry_mpih_mul_1:) + + FUNC_ENTRY() + movq %rdx, %r11 + leaq (%rsi,%rdx,8), %rsi + leaq (%rdi,%rdx,8), %rdi + negq %r11 + xorl %r8d, %r8d + +.Loop: movq (%rsi,%r11,8), %rax + mulq %rcx + addq %r8, %rax + movl $0, %r8d + adcq %rdx, %r8 + movq %rax, (%rdi,%r11,8) + incq %r11 + jne .Loop + + movq %r8, %rax + FUNC_EXIT() + ret diff --git a/comm/third_party/libgcrypt/mpi/amd64/mpih-mul2.S b/comm/third_party/libgcrypt/mpi/amd64/mpih-mul2.S new file mode 100644 index 0000000000..1aa4fa0ac8 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/amd64/mpih-mul2.S @@ -0,0 +1,66 @@ +/* AMD64 addmul2 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * + * Copyright (C) 1992, 1994, 1998, + * 2001, 2002, 2006 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + +/******************* + * mpi_limb_t + * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, (rdi) + * mpi_ptr_t s1_ptr, (rsi) + * mpi_size_t s1_size, (rdx) + * mpi_limb_t s2_limb) (rcx) + */ + TEXT + GLOBL C_SYMBOL_NAME(_gcry_mpih_addmul_1) +C_SYMBOL_NAME(_gcry_mpih_addmul_1:) + FUNC_ENTRY() + movq %rdx, %r11 + leaq (%rsi,%rdx,8), %rsi + leaq (%rdi,%rdx,8), %rdi + negq %r11 + xorl %r8d, %r8d + xorl %r10d, %r10d + + ALIGN(3) /* minimal alignment for claimed speed */ +.Loop: movq (%rsi,%r11,8), %rax + mulq %rcx + addq (%rdi,%r11,8), %rax + adcq %r10, %rdx + addq %r8, %rax + movq %r10, %r8 + movq %rax, (%rdi,%r11,8) + adcq %rdx, %r8 + incq %r11 + jne .Loop + + movq %r8, %rax + FUNC_EXIT() + ret diff --git a/comm/third_party/libgcrypt/mpi/amd64/mpih-mul3.S b/comm/third_party/libgcrypt/mpi/amd64/mpih-mul3.S new file mode 100644 index 0000000000..bc41c4eb97 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/amd64/mpih-mul3.S @@ -0,0 +1,67 @@ +/* AMD64 submul_1 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * + * Copyright (C) 1992, 1994, 1998, + * 2001, 2002, 2006 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, (rdi) + * mpi_ptr_t s1_ptr, (rsi) + * mpi_size_t s1_size, (rdx) + * mpi_limb_t s2_limb) (rcx) + */ + TEXT + GLOBL C_SYMBOL_NAME(_gcry_mpih_submul_1) +C_SYMBOL_NAME(_gcry_mpih_submul_1:) + FUNC_ENTRY() + movq %rdx, %r11 + leaq (%rsi,%r11,8), %rsi + leaq (%rdi,%r11,8), %rdi + negq %r11 + xorl %r8d, %r8d + + ALIGN(3) /* minimal alignment for claimed speed */ +.Loop: movq (%rsi,%r11,8), %rax + movq (%rdi,%r11,8), %r10 + mulq %rcx + subq %r8, %r10 + movl $0, %r8d + adcl %r8d, %r8d + subq %rax, %r10 + adcq %rdx, %r8 + movq %r10, (%rdi,%r11,8) + incq %r11 + jne .Loop + + movq %r8, %rax + FUNC_EXIT() + ret diff --git a/comm/third_party/libgcrypt/mpi/amd64/mpih-rshift.S b/comm/third_party/libgcrypt/mpi/amd64/mpih-rshift.S new file mode 100644 index 0000000000..d5e27974ed --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/amd64/mpih-rshift.S @@ -0,0 +1,82 @@ +/* AMD64 (x86_64) rshift -- Right shift a limb vector and store + * result in a second limb vector. + * + * Copyright (C) 1992, 1994, 1995, 1998, + * 2001, 2002, 2006 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + +/******************* + * mpi_limb_t + * _gcry_mpih_rshift( mpi_ptr_t wp, rdi + * mpi_ptr_t up, rsi + * mpi_size_t usize, rdx + * unsigned cnt) rcx + */ + +.text + .globl C_SYMBOL_NAME(_gcry_mpih_rshift) +C_SYMBOL_NAME(_gcry_mpih_rshift:) + FUNC_ENTRY() + /* Note: %xmm6 and %xmm7 not used for WIN64 ABI compatibility. */ + movq (%rsi), %xmm4 + movd %ecx, %xmm1 + movl $64, %eax + subl %ecx, %eax + movd %eax, %xmm0 + movdqa %xmm4, %xmm3 + psllq %xmm0, %xmm4 + movd %xmm4, %rax + leaq (%rsi,%rdx,8), %rsi + leaq (%rdi,%rdx,8), %rdi + negq %rdx + addq $2, %rdx + jg .Lendo + + ALIGN(4) /* minimal alignment for claimed speed */ +.Loop: movq -8(%rsi,%rdx,8), %xmm5 + movdqa %xmm5, %xmm2 + psllq %xmm0, %xmm5 + psrlq %xmm1, %xmm3 + por %xmm5, %xmm3 + movq %xmm3, -16(%rdi,%rdx,8) + je .Lende + movq (%rsi,%rdx,8), %xmm4 + movdqa %xmm4, %xmm3 + psllq %xmm0, %xmm4 + psrlq %xmm1, %xmm2 + por %xmm4, %xmm2 + movq %xmm2, -8(%rdi,%rdx,8) + addq $2, %rdx + jle .Loop + +.Lendo: movdqa %xmm3, %xmm2 +.Lende: psrlq %xmm1, %xmm2 + movq %xmm2, -8(%rdi) + FUNC_EXIT() + ret diff --git a/comm/third_party/libgcrypt/mpi/amd64/mpih-sub1.S b/comm/third_party/libgcrypt/mpi/amd64/mpih-sub1.S new file mode 100644 index 0000000000..ccf6496315 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/amd64/mpih-sub1.S @@ -0,0 +1,63 @@ +/* AMD64 (x86_64) sub_n -- Subtract two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 1992, 1994, 1995, 1998, + * 2001, 2002, 2006 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, rdi + * mpi_ptr_t s1_ptr, rsi + * mpi_ptr_t s2_ptr, rdx + * mpi_size_t size) rcx + */ +.text + .globl C_SYMBOL_NAME(_gcry_mpih_sub_n) +C_SYMBOL_NAME(_gcry_mpih_sub_n:) + FUNC_ENTRY() + leaq (%rsi,%rcx,8), %rsi + leaq (%rdi,%rcx,8), %rdi + leaq (%rdx,%rcx,8), %rdx + negq %rcx + xorl %eax, %eax /* clear cy */ + + ALIGN(4) /* minimal alignment for claimed speed */ +.Loop: movq (%rsi,%rcx,8), %rax + movq (%rdx,%rcx,8), %r10 + sbbq %r10, %rax + movq %rax, (%rdi,%rcx,8) + incq %rcx + jne .Loop + + movq %rcx, %rax /* zero %rax */ + adcq %rax, %rax + FUNC_EXIT() + ret diff --git a/comm/third_party/libgcrypt/mpi/arm/distfiles b/comm/third_party/libgcrypt/mpi/arm/distfiles new file mode 100644 index 0000000000..27a2ca5272 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/arm/distfiles @@ -0,0 +1,6 @@ +mpi-asm-defs.h +mpih-add1.S +mpih-mul1.S +mpih-mul2.S +mpih-mul3.S +mpih-sub1.S diff --git a/comm/third_party/libgcrypt/mpi/arm/mpi-asm-defs.h b/comm/third_party/libgcrypt/mpi/arm/mpi-asm-defs.h new file mode 100644 index 0000000000..047d1f5a72 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/arm/mpi-asm-defs.h @@ -0,0 +1,4 @@ +/* This file defines some basic constants for the MPI machinery. We + * need to define the types on a per-CPU basis, so it is done with + * this file here. */ +#define BYTES_PER_MPI_LIMB (SIZEOF_UNSIGNED_LONG) diff --git a/comm/third_party/libgcrypt/mpi/arm/mpih-add1.S b/comm/third_party/libgcrypt/mpi/arm/mpih-add1.S new file mode 100644 index 0000000000..09e8b3b2bb --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/arm/mpih-add1.S @@ -0,0 +1,76 @@ +/* ARM add_n -- Add two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * Note: This code is heavily based on the GNU MP Library (version 4.2.1). + */ + +#include "sysdep.h" +#include "asm-syntax.h" + +.syntax unified +.arm + +/******************* + * mpi_limb_t + * _gcry_mpih_add_n( mpi_ptr_t res_ptr, %r0 + * mpi_ptr_t s1_ptr, %r1 + * mpi_ptr_t s2_ptr, %r2 + * mpi_size_t size) %r3 + */ + +.text + +.globl _gcry_mpih_add_n +.type _gcry_mpih_add_n,%function +_gcry_mpih_add_n: + push {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %lr}; + cmn %r0, #0; /* clear carry flag */ + + tst %r3, #3; + beq .Large_loop; + +.Loop: + ldr %r4, [%r1], #4; + sub %r3, #1; + ldr %lr, [%r2], #4; + adcs %r4, %lr; + tst %r3, #3; + str %r4, [%r0], #4; + bne .Loop; + + teq %r3, #0; + beq .Lend; + +.Large_loop: + ldm %r1!, {%r4, %r6, %r8, %r10}; + ldm %r2!, {%r5, %r7, %r9, %lr}; + sub %r3, #4; + adcs %r4, %r5; + adcs %r6, %r7; + adcs %r8, %r9; + adcs %r10, %lr; + teq %r3, #0; + stm %r0!, {%r4, %r6, %r8, %r10}; + bne .Large_loop; + +.Lend: + adc %r0, %r3, #0; + pop {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %pc}; +.size _gcry_mpih_add_n,.-_gcry_mpih_add_n; diff --git a/comm/third_party/libgcrypt/mpi/arm/mpih-mul1.S b/comm/third_party/libgcrypt/mpi/arm/mpih-mul1.S new file mode 100644 index 0000000000..c2e2854bf1 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/arm/mpih-mul1.S @@ -0,0 +1,80 @@ +/* ARM mul_1 -- Multiply a limb vector with a limb and store the result in + * a second limb vector. + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * Note: This code is heavily based on the GNU MP Library (version 4.2.1). + */ + +#include "sysdep.h" +#include "asm-syntax.h" + +.syntax unified +.arm + +/******************* + * mpi_limb_t + * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, %r0 + * mpi_ptr_t s1_ptr, %r1 + * mpi_size_t s1_size, %r2 + * mpi_limb_t s2_limb) %r3 + */ + +.text + +.globl _gcry_mpih_mul_1 +.type _gcry_mpih_mul_1,%function +_gcry_mpih_mul_1: + push {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %r11, %lr}; + mov %r4, #0; + + tst %r2, #3; + beq .Large_loop; + +.Loop: + ldr %r5, [%r1], #4; + mov %lr, #0; + umlal %r4, %lr, %r5, %r3; + sub %r2, #1; + str %r4, [%r0], #4; + tst %r2, #3; + mov %r4, %lr; + bne .Loop; + + teq %r2, #0; + beq .Lend; + +.Large_loop: + ldm %r1!, {%r5, %r6, %r7, %r8}; + mov %r9, #0; + mov %r10, #0; + umlal %r4, %r9, %r5, %r3; + mov %r11, #0; + umlal %r9, %r10, %r6, %r3; + str %r4, [%r0], #4; + mov %r4, #0; + umlal %r10, %r11, %r7, %r3; + subs %r2, #4; + umlal %r11, %r4, %r8, %r3; + stm %r0!, {%r9, %r10, %r11}; + bne .Large_loop; + +.Lend: + mov %r0, %r4; + pop {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %r11, %pc}; +.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1; diff --git a/comm/third_party/libgcrypt/mpi/arm/mpih-mul2.S b/comm/third_party/libgcrypt/mpi/arm/mpih-mul2.S new file mode 100644 index 0000000000..bce932e9b0 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/arm/mpih-mul2.S @@ -0,0 +1,94 @@ +/* ARM mul_2 -- Multiply a limb vector with a limb and add the result to + * a second limb vector. + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * Note: This code is heavily based on the GNU MP Library (version 4.2.1). + */ + +#include "sysdep.h" +#include "asm-syntax.h" + +.syntax unified +.arm + +/******************* + * mpi_limb_t + * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, %r0 + * mpi_ptr_t s1_ptr, %r1 + * mpi_size_t s1_size, %r2 + * mpi_limb_t s2_limb) %r3 + */ + +.text + +.globl _gcry_mpih_addmul_1 +.type _gcry_mpih_addmul_1,%function +_gcry_mpih_addmul_1: + push {%r4, %r5, %r6, %r8, %r10, %lr}; + mov %lr, #0; + cmn %r0, #0; /* clear carry flag */ + + tst %r2, #3; + beq .Large_loop; +.Loop: + ldr %r5, [%r1], #4; + ldr %r4, [%r0]; + sub %r2, #1; + adcs %r4, %lr; + mov %lr, #0; + umlal %r4, %lr, %r5, %r3; + tst %r2, #3; + str %r4, [%r0], #4; + bne .Loop; + + teq %r2, #0; + beq .Lend; + +.Large_loop: + ldr %r5, [%r1], #4; + ldm %r0, {%r4, %r6, %r8, %r10}; + + sub %r2, #4; + adcs %r4, %lr; + mov %lr, #0; + umlal %r4, %lr, %r5, %r3; + + ldr %r5, [%r1], #4; + adcs %r6, %lr; + mov %lr, #0; + umlal %r6, %lr, %r5, %r3; + + ldr %r5, [%r1], #4; + adcs %r8, %lr; + mov %lr, #0; + umlal %r8, %lr, %r5, %r3; + + ldr %r5, [%r1], #4; + adcs %r10, %lr; + mov %lr, #0; + umlal %r10, %lr, %r5, %r3; + + teq %r2, #0; + stm %r0!, {%r4, %r6, %r8, %r10}; + bne .Large_loop; + +.Lend: + adc %r0, %lr, #0; + pop {%r4, %r5, %r6, %r8, %r10, %pc}; +.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1; diff --git a/comm/third_party/libgcrypt/mpi/arm/mpih-mul3.S b/comm/third_party/libgcrypt/mpi/arm/mpih-mul3.S new file mode 100644 index 0000000000..33326c7873 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/arm/mpih-mul3.S @@ -0,0 +1,100 @@ +/* ARM mul_3 -- Multiply a limb vector with a limb and subtract the result + * from a second limb vector. + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * Note: This code is heavily based on the GNU MP Library (version 4.2.1). + */ + +#include "sysdep.h" +#include "asm-syntax.h" + +.syntax unified +.arm + +/******************* + * mpi_limb_t + * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, %r0 + * mpi_ptr_t s1_ptr, %r1 + * mpi_size_t s1_size, %r2 + * mpi_limb_t s2_limb) %r3 + */ + +.text + +.globl _gcry_mpih_submul_1 +.type _gcry_mpih_submul_1,%function +_gcry_mpih_submul_1: + push {%r4, %r5, %r6, %r8, %r9, %r10, %lr}; + mov %lr, #0; + cmp %r0, #0; /* prepare carry flag for sbc */ + + tst %r2, #3; + beq .Large_loop; +.Loop: + ldr %r5, [%r1], #4; + mov %r4, %lr; + mov %lr, #0; + ldr %r6, [%r0]; + umlal %r4, %lr, %r5, %r3; + sub %r2, #1; + sbcs %r4, %r6, %r4; + tst %r2, #3; + str %r4, [%r0], #4; + bne .Loop; + + teq %r2, #0; + beq .Lend; + +.Large_loop: + ldr %r5, [%r1], #4; + mov %r9, #0; + ldr %r4, [%r0, #0]; + + umlal %lr, %r9, %r5, %r3; + ldr %r6, [%r0, #4]; + ldr %r5, [%r1], #4; + sbcs %r4, %r4, %lr; + + mov %lr, #0; + umlal %r9, %lr, %r5, %r3; + ldr %r8, [%r0, #8]; + ldr %r5, [%r1], #4; + sbcs %r6, %r6, %r9; + + mov %r9, #0; + umlal %lr, %r9, %r5, %r3; + ldr %r10, [%r0, #12]; + ldr %r5, [%r1], #4; + sbcs %r8, %r8, %lr; + + mov %lr, #0; + umlal %r9, %lr, %r5, %r3; + sub %r2, #4; + sbcs %r10, %r10, %r9; + + teq %r2, #0; + stm %r0!, {%r4, %r6, %r8, %r10}; + bne .Large_loop; + +.Lend: + it cc + movcc %r2, #1; + add %r0, %lr, %r2; + pop {%r4, %r5, %r6, %r8, %r9, %r10, %pc}; +.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1; diff --git a/comm/third_party/libgcrypt/mpi/arm/mpih-sub1.S b/comm/third_party/libgcrypt/mpi/arm/mpih-sub1.S new file mode 100644 index 0000000000..593e3cded6 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/arm/mpih-sub1.S @@ -0,0 +1,77 @@ +/* ARM sub_n -- Subtract two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * Note: This code is heavily based on the GNU MP Library (version 4.2.1). + */ + +#include "sysdep.h" +#include "asm-syntax.h" + +.syntax unified +.arm + +/******************* + * mpi_limb_t + * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, %r0 + * mpi_ptr_t s1_ptr, %r1 + * mpi_ptr_t s2_ptr, %r2 + * mpi_size_t size) %r3 + */ + +.text + +.globl _gcry_mpih_sub_n +.type _gcry_mpih_sub_n,%function +_gcry_mpih_sub_n: + push {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %lr}; + cmp %r0, #0; /* prepare carry flag for sub */ + + tst %r3, #3; + beq .Large_loop; + +.Loop: + ldr %r4, [%r1], #4; + sub %r3, #1; + ldr %lr, [%r2], #4; + sbcs %r4, %lr; + tst %r3, #3; + str %r4, [%r0], #4; + bne .Loop; + + teq %r3, #0; + beq .Lend; + +.Large_loop: + ldm %r1!, {%r4, %r6, %r8, %r10}; + sub %r3, #4; + ldm %r2!, {%r5, %r7, %r9, %lr}; + sbcs %r4, %r5; + sbcs %r6, %r7; + sbcs %r8, %r9; + sbcs %r10, %lr; + teq %r3, #0; + stm %r0!, {%r4, %r6, %r8, %r10}; + bne .Large_loop; + +.Lend: + sbc %r0, %r3, #0; + neg %r0, %r0; + pop {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %pc}; +.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n; diff --git a/comm/third_party/libgcrypt/mpi/asm-common-aarch64.h b/comm/third_party/libgcrypt/mpi/asm-common-aarch64.h new file mode 100644 index 0000000000..cf4bdb8529 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/asm-common-aarch64.h @@ -0,0 +1,26 @@ +/* asm-common-aarch64.h - Common macros for AArch64 assembly + * + * Copyright (C) 2018 Martin Storsjö <martin@martin.st> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MPI_ASM_COMMON_AARCH64_H +#define MPI_ASM_COMMON_AARCH64_H + +#include "../cipher/asm-common-aarch64.h" + +#endif /* MPI_ASM_COMMON_AARCH64_H */ diff --git a/comm/third_party/libgcrypt/mpi/config.links b/comm/third_party/libgcrypt/mpi/config.links new file mode 100644 index 0000000000..e4fc4fc4fd --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/config.links @@ -0,0 +1,470 @@ +# config.links - helper for ../configure -*- mode: sh -*- +# Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc. +# Copyright (C) 2012 g10 Code GmbH +# +# This file is part of Libgcrypt. +# +# Libgcrypt is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2.1 of +# the License, or (at your option) any later version. +# +# Libgcrypt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA +# +# sourced by ../configure to get the list of files to link +# this should set $mpi_ln_list. +# Note: this is called from the above directory. +# +# Reguired variables: +# $ac_cv_sys_symbol_underscore +# $gcry_cv_gcc_arm_platform_as_ok + +mpi_sflags= +mpi_extra_modules= +mpi_cpu_arch= + +test -d ./mpi || mkdir ./mpi + +# We grep the list of modules from the Makefile so that +# we don't need to maintain them here. +mpi_standard_modules=`$AWK '/^#BEGIN_ASM_LIST/,/^#END_ASM_LIST/ { + if( $3 != "O" ) print $2 }' $srcdir/mpi/Makefile.am` +mpi_optional_modules=`$AWK '/^#BEGIN_ASM_LIST/,/^#END_ASM_LIST/ { + if( $3 == "O" ) print $2 }' $srcdir/mpi/Makefile.am` + + +echo '/* created by config.links - do not edit */' >./mpi/asm-syntax.h +echo "/* Host: ${host} */" >>./mpi/asm-syntax.h + +case "${host}" in + i[34567]86*-*-openbsd[12]* | \ + i[34567]86*-*-openbsd3.[0123]*) + echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h + path="" + mpi_cpu_arch="x86" + ;; + i[3467]86*-*-openbsd* | \ + i[3467]86*-*-freebsd*-elf | \ + i[3467]86*-*-freebsd[3-9]* | \ + i[3467]86*-*-freebsd[12][0-9]*| \ + i[3467]86*-*-freebsdelf* | \ + i[3467]86*-*-netbsd* | \ + i[3467]86*-*-k*bsd*) + echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h + path="i386" + mpi_cpu_arch="x86" + ;; + i586*-*-openbsd* | \ + i586*-*-freebsd*-elf | \ + i586*-*-freebsd[3-9]* | \ + i586*-*-freebsd[12][0-9]*| \ + i586*-*-freebsdelf* | \ + i586*-*-netbsd* | \ + i586*-*-k*bsd* | \ + pentium-*-netbsd* | \ + pentiumpro-*-netbsd*) + echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h + path="i586 i386" + mpi_cpu_arch="x86" + ;; + i[34]86*-*-bsdi4*) + echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h + path="i386" + mpi_cpu_arch="x86" + ;; + i[3467]86*-*-linuxaout* | \ + i[3467]86*-*-linuxoldld* | \ + i[3467]86*-*-*bsd*) + echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h + echo '#define X86_BROKEN_ALIGN' >>./mpi/asm-syntax.h + cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h + path="i386" + mpi_cpu_arch="x86" + ;; + i586*-*-linuxaout* | \ + i586*-*-linuxoldld* | \ + i586*-*-*bsd*) + echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h + echo '#define X86_BROKEN_ALIGN' >>./mpi/asm-syntax.h + cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h + path="i586 i386" + mpi_cpu_arch="x86" + ;; + i[3467]86*-msdosdjgpp* | \ + i[34]86*-apple-darwin*) + echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h + path="i386" + mpi_cpu_arch="x86" + ;; + i586*-msdosdjgpp* | \ + i[567]86*-apple-darwin*) + echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h + path="i586 i386" + mpi_cpu_arch="x86" + ;; + i[3467]86*-*-*) + echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h + path="i386" + mpi_cpu_arch="x86" + ;; + i586*-*-* | \ + pentium-*-* | \ + pentiumpro-*-*) + echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h + path="i586 i386" + mpi_cpu_arch="x86" + ;; + x86_64-apple-darwin*) + echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h + cat $srcdir/mpi/amd64/func_abi.h >>./mpi/asm-syntax.h + path="amd64" + mpi_cpu_arch="x86" + ;; + x86_64-*mingw32*) + echo '#define USE_MS_ABI' >>./mpi/asm-syntax.h + echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h + cat $srcdir/mpi/amd64/func_abi.h >>./mpi/asm-syntax.h + path="amd64" + mpi_cpu_arch="x86" + ;; + x86_64-*-*) + echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h + cat $srcdir/mpi/amd64/func_abi.h >>./mpi/asm-syntax.h + path="amd64" + mpi_cpu_arch="x86" + ;; + alpha*-*-*) + echo '/* configured for alpha */' >>./mpi/asm-syntax.h + path="alpha" + mpi_extra_modules="udiv-qrnnd" + mpi_cpu_arch="alpha" + ;; + aarch64-*-*) + echo '/* configured for aarch64 */' >>./mpi/asm-syntax.h + path="aarch64" + mpi_cpu_arch="aarch64" + ;; + arm*-*-*) + mpi_cpu_arch="arm" + if test "$gcry_cv_gcc_arm_platform_as_ok" = "yes" ; then + echo '/* configured for arm */' >>./mpi/asm-syntax.h + path="arm" + else + echo '/* No assembler modules configured */' >>./mpi/asm-syntax.h + path="" + fi + ;; + hppa7000*-*-*) + echo '/* configured for HPPA (pa7000) */' >>./mpi/asm-syntax.h + path="hppa1.1 hppa" + mpi_extra_modules="udiv-qrnnd" + mpi_cpu_arch="hppa" + ;; + hppa1.0*-*-*) + echo '/* configured for HPPA 1.0 */' >>./mpi/asm-syntax.h + path="hppa" + mpi_extra_modules="udiv-qrnnd" + mpi_cpu_arch="hppa" + ;; + hppa*-*-*) # assume pa7100 + echo '/* configured for HPPA (pa7100) */' >>./mpi/asm-syntax.h + path="pa7100 hppa1.1 hppa" + mpi_extra_modules="udiv-qrnnd" + mpi_cpu_arch="hppa" + ;; + sparc64-*-linux-gnu) + echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h + path="" + mpi_cpu_arch="sparc" + ;; + sparc64-sun-solaris2*) + echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h + path="" + mpi_cpu_arch="sparc" + ;; + sparc64-*-netbsd* | sparc64-*-freebsd* | sparc64-*-openbsd*) + # There are no sparc64 assembler modules that work on the + # *BSDs, so use the generic C functions. + echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h + path="" + mpi_cpu_arch="sparc" + ;; + sparc64*-*-*) + echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h + path="" + mpi_cpu_arch="sparc" + ;; + sparc9*-*-* | \ + ultrasparc*-*-* ) + echo '/* configured for sparc9 or higher */' >>./mpi/asm-syntax.h + path="sparc32v8 sparc32" + mpi_cpu_arch="sparc" + ;; + sparc8*-*-* | \ + microsparc*-*-*) + echo '/* configured for sparc8 */' >>./mpi/asm-syntax.h + path="sparc32v8 sparc32" + mpi_cpu_arch="sparc" + ;; + supersparc*-*-*) + echo '/* configured for supersparc */' >>./mpi/asm-syntax.h + path="supersparc sparc32v8 sparc32" + mpi_extra_modules="udiv" + mpi_cpu_arch="sparc" + ;; + sparc*-*-*) + echo '/* configured for sparc */' >>./mpi/asm-syntax.h + path="sparc32" + mpi_extra_modules="udiv" + mpi_cpu_arch="sparc" + ;; + mips[34]*-*-* | \ + mips*-*-irix6*) + echo '/* configured for MIPS3 */' >>./mpi/asm-syntax.h + path="mips3" + mpi_cpu_arch="mips" + ;; + mips*-*-*) + echo '/* configured for MIPS2 */' >>./mpi/asm-syntax.h + path="mips2" + mpi_cpu_arch="mips" + ;; + s390x*-*-*) + echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h + path="" + mpi_cpu_arch="s390x" + ;; + + # Motorola 68k configurations. Let m68k mean 68020-68040. + # mc68000 or mc68060 configurations need to be specified explicitly + m680[234]0*-*-linuxaout* | \ + m68k*-*-linuxaout*) + echo '#define MIT_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h + path="m68k/mc68020 m68k" + mpi_cpu_arch="m68k" + ;; + m68060*-*-linuxaout*) + echo '#define MIT_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h + path="m68k" + mpi_cpu_arch="m68k" + ;; + m680[234]0*-*-linux* | \ + m68k*-*-linux*) + echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h + mpi_cpu_arch="m68k" + ;; + m68060*-*-linux*) + echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h + path="m68k" + mpi_cpu_arch="m68k" + ;; + m68k-atari-mint) + echo '#define MIT_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h + path="m68k" + mpi_cpu_arch="m68k" + ;; + m68000*-*-* | \ + m68060*-*-*) + echo '#define MIT_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h + path="m68k/mc68000" + mpi_cpu_arch="m68k" + ;; + m680[234]0*-*-* | \ + m68k*-*-*) + echo '#define MIT_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/m68k/syntax.h >>./mpi/asm-syntax.h + path="m68k/mc68020 m68k" + mpi_cpu_arch="m68k" + ;; + + powerpc-apple-darwin*) + echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h + path="" + mpi_cpu_arch="ppc" + ;; + + powerpc*-*-netbsd* | powerpc*-*-openbsd*) + echo '/* configured {Open,Net}BSD on powerpc */' >>./mpi/asm-syntax.h + echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/powerpc32/syntax.h >>./mpi/asm-syntax.h + mpi_sflags="-Wa,-mppc" + path="powerpc32" + mpi_cpu_arch="ppc" + ;; + + ppc620-*-* | \ + powerpc64*-*-*) + mpi_sflags="-Wa,-mppc" + path="powerpc64" + mpi_cpu_arch="ppc" + ;; + powerpc*-*-linux*) + echo '/* configured for powerpc/ELF */' >>./mpi/asm-syntax.h + echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h + cat $srcdir/mpi/powerpc32/syntax.h >>./mpi/asm-syntax.h + path="powerpc32" + mpi_cpu_arch="ppc" + ;; + + rs6000-*-aix[456789]* | \ + rs6000-*-aix3.2.[456789]) + mpi_sflags="-Wa,-mpwr" + path="power" + mpi_extra_modules="udiv-w-sdiv" + mpi_cpu_arch="ppc" + ;; + rs6000-*-* | \ + power-*-* | \ + power2-*-*) + mpi_sflags="-Wa,-mppc" + path="power" + mpi_extra_modules="udiv-w-sdiv" + mpi_cpu_arch="ppc" + ;; + powerpc-ibm-aix4.2.* ) + # I am not sure about this one but a machine identified by + # powerpc-ibm-aix4.2.1.0 cannot use the powerpc32 code. + mpi_sflags="-Wa,-mpwr" + path="power" + mpi_extra_modules="udiv-w-sdiv" + mpi_cpu_arch="ppc" + ;; + ppc601-*-*) + mpi_sflags="-Wa,-mppc" + path="power powerpc32" + mpi_cpu_arch="ppc" + ;; + ppc60[234]*-*-*) + mpi_sflags="-Wa,-mppc" + path="powerpc32" + mpi_cpu_arch="ppc" + ;; + powerpc*-*-*) + mpi_sflags="-Wa,-mppc" + path="powerpc32" + mpi_cpu_arch="ppc" + ;; + *) + echo '/* Platform not known */' >>./mpi/asm-syntax.h + path="" + ;; +esac + +# If asm modules are disabled reset the found variables but keep +# mpi_cpu_arch. +if test "$try_asm_modules" != "yes" ; then + echo '/* Assembler modules disabled on request */' >./mpi/asm-syntax.h + path="" + mpi_sflags="" + mpi_extra_modules="" + mpi_cpu_arch="disabled" +fi + +# Make sure that mpi_cpu_arch is not the empty string. +if test x"$mpi_cpu_arch" = x ; then + mpi_cpu_arch="unknown" +fi + +# Add .note.gnu.property section for Intel CET in assembler sources +# when CET is enabled. */ +if test x"$mpi_cpu_arch" = xx86 ; then + cat <<EOF >> ./mpi/asm-syntax.h + +#if defined(__ASSEMBLER__) && defined(__CET__) +# include <cet.h> +#endif +EOF +fi + +# Make sysdep.h +echo '/* created by config.links - do not edit */' >./mpi/sysdep.h +if test x$ac_cv_sys_symbol_underscore = xyes; then + cat <<EOF >>./mpi/sysdep.h +#if __STDC__ +#define C_SYMBOL_NAME(name) _##name +#else +#define C_SYMBOL_NAME(name) _/**/name +#endif +EOF +else + cat <<EOF >>./mpi/sysdep.h +#define C_SYMBOL_NAME(name) name +EOF +fi + + +# Figure the required modules out +mpi_required_modules=$mpi_standard_modules +if test "$mpi_extra_modules" != ""; then + for fn in $mpi_extra_modules; do + for i in $mpi_optional_modules; do + if test "$fn" = "$i" ; then + mpi_required_modules="$mpi_required_modules $fn" + fi + done + done +fi + +# Try to get file to link from the assembler subdirectory and +# if this fails get it from the generic subdirectory. +mpi_ln_list= +mpi_mod_list= +path=`echo "$mpi_extra_path $path generic" | tr ':' ' '` +echo '/* Created by config.links - do not edit */' >./mpi/mod-source-info.h +echo "/* Host: ${host} */" >>./mpi/mod-source-info.h +echo "static char mod_source_info[] =" >>./mpi/mod-source-info.h +for fn in $mpi_required_modules ; do + fnu=`echo $fn | sed 's/-/_/g'` + eval mpi_mod_c_${fnu}=no + eval mpi_mod_asm_${fnu}=no + for dir in $path ; do + rm -f $srcdir/mpi/$fn.[Sc] + if test -f $srcdir/mpi/$dir/$fn.S ; then + echo " \":$dir/$fn.S\"" >>./mpi/mod-source-info.h + mpi_ln_list="$mpi_ln_list mpi/$fn-asm.S:mpi/$dir/$fn.S" + eval mpi_mod_asm_${fnu}=yes + mpi_mod_list="$mpi_mod_list $fn" + break; + elif test -f $srcdir/mpi/$dir/$fn.c ; then + echo " \":$dir/$fn.c\"" >>./mpi/mod-source-info.h + mpi_ln_list="$mpi_ln_list mpi/$fn.c:mpi/$dir/$fn.c" + eval mpi_mod_c_${fnu}=yes + mpi_mod_list="$mpi_mod_list $fn" + break; + fi + done +done +echo " ;" >>./mpi/mod-source-info.h + +# Same thing for the file which defines the limb size +path=`echo "$path generic" | tr ':' ' '` +for dir in $path ; do + rm -f $srcdir/mpi/mpi-asm-defs.h + if test -f $srcdir/mpi/$dir/mpi-asm-defs.h ; then + mpi_ln_list="$mpi_ln_list mpi/mpi-asm-defs.h:mpi/$dir/mpi-asm-defs.h" + break; + fi +done diff --git a/comm/third_party/libgcrypt/mpi/ec-ed25519.c b/comm/third_party/libgcrypt/mpi/ec-ed25519.c new file mode 100644 index 0000000000..acfe2a69f5 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/ec-ed25519.c @@ -0,0 +1,37 @@ +/* ec-ed25519.c - Ed25519 optimized elliptic curve functions + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> + +#include "mpi-internal.h" +#include "longlong.h" +#include "g10lib.h" +#include "context.h" +#include "ec-context.h" + + +void +_gcry_mpi_ec_ed25519_mod (gcry_mpi_t a) +{ + (void)a; + +} diff --git a/comm/third_party/libgcrypt/mpi/ec-internal.h b/comm/third_party/libgcrypt/mpi/ec-internal.h new file mode 100644 index 0000000000..759335aad0 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/ec-internal.h @@ -0,0 +1,25 @@ +/* ec-internal.h - Internal declarations of ec*.c + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef GCRY_EC_INTERNAL_H +#define GCRY_EC_INTERNAL_H + +void _gcry_mpi_ec_ed25519_mod (gcry_mpi_t a); + +#endif /*GCRY_EC_INTERNAL_H*/ diff --git a/comm/third_party/libgcrypt/mpi/ec.c b/comm/third_party/libgcrypt/mpi/ec.c new file mode 100644 index 0000000000..659bb5caf1 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/ec.c @@ -0,0 +1,2062 @@ +/* ec.c - Elliptic Curve functions + * Copyright (C) 2007 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> + +#include "mpi-internal.h" +#include "longlong.h" +#include "g10lib.h" +#include "context.h" +#include "ec-context.h" +#include "ec-internal.h" + +extern void reverse_buffer (unsigned char *buffer, unsigned int length); + +#define point_init(a) _gcry_mpi_point_init ((a)) +#define point_free(a) _gcry_mpi_point_free_parts ((a)) + + +/* Print a point using the log functions. If CTX is not NULL affine + coordinates will be printed. */ +void +_gcry_mpi_point_log (const char *name, mpi_point_t point, mpi_ec_t ctx) +{ + gcry_mpi_t x, y; + char buf[100]; + + if (!point) + { + snprintf (buf, sizeof buf - 1, "%s.*", name); + log_mpidump (buf, NULL); + return; + } + snprintf (buf, sizeof buf - 1, "%s.X", name); + + if (ctx) + { + x = mpi_new (0); + y = mpi_new (0); + } + if (!ctx || _gcry_mpi_ec_get_affine (x, y, point, ctx)) + { + log_mpidump (buf, point->x); + buf[strlen(buf)-1] = 'Y'; + log_mpidump (buf, point->y); + buf[strlen(buf)-1] = 'Z'; + log_mpidump (buf, point->z); + } + else + { + buf[strlen(buf)-1] = 'x'; + log_mpidump (buf, x); + buf[strlen(buf)-1] = 'y'; + log_mpidump (buf, y); + + } + if (ctx) + { + _gcry_mpi_release (x); + _gcry_mpi_release (y); + } +} + + +/* Create a new point option. NBITS gives the size in bits of one + coordinate; it is only used to pre-allocate some resources and + might also be passed as 0 to use a default value. */ +mpi_point_t +_gcry_mpi_point_new (unsigned int nbits) +{ + mpi_point_t p; + + (void)nbits; /* Currently not used. */ + + p = xmalloc (sizeof *p); + _gcry_mpi_point_init (p); + return p; +} + + +/* Release the point object P. P may be NULL. */ +void +_gcry_mpi_point_release (mpi_point_t p) +{ + if (p) + { + _gcry_mpi_point_free_parts (p); + xfree (p); + } +} + + +/* Initialize the fields of a point object. gcry_mpi_point_free_parts + may be used to release the fields. */ +void +_gcry_mpi_point_init (mpi_point_t p) +{ + p->x = mpi_new (0); + p->y = mpi_new (0); + p->z = mpi_new (0); +} + + +/* Release the parts of a point object. */ +void +_gcry_mpi_point_free_parts (mpi_point_t p) +{ + mpi_free (p->x); p->x = NULL; + mpi_free (p->y); p->y = NULL; + mpi_free (p->z); p->z = NULL; +} + + +/* Set the value from S into D. */ +static void +point_set (mpi_point_t d, mpi_point_t s) +{ + mpi_set (d->x, s->x); + mpi_set (d->y, s->y); + mpi_set (d->z, s->z); +} + + +/* Return a copy of POINT. */ +gcry_mpi_point_t +_gcry_mpi_point_copy (gcry_mpi_point_t point) +{ + mpi_point_t newpoint; + + newpoint = _gcry_mpi_point_new (0); + if (point) + point_set (newpoint, point); + + return newpoint; +} + + +static void +point_resize (mpi_point_t p, mpi_ec_t ctx) +{ + size_t nlimbs = ctx->p->nlimbs; + + mpi_resize (p->x, nlimbs); + p->x->nlimbs = nlimbs; + mpi_resize (p->z, nlimbs); + p->z->nlimbs = nlimbs; + + if (ctx->model != MPI_EC_MONTGOMERY) + { + mpi_resize (p->y, nlimbs); + p->y->nlimbs = nlimbs; + } +} + + +static void +point_swap_cond (mpi_point_t d, mpi_point_t s, unsigned long swap, + mpi_ec_t ctx) +{ + mpi_swap_cond (d->x, s->x, swap); + if (ctx->model != MPI_EC_MONTGOMERY) + mpi_swap_cond (d->y, s->y, swap); + mpi_swap_cond (d->z, s->z, swap); +} + + +/* Set the projective coordinates from POINT into X, Y, and Z. If a + coordinate is not required, X, Y, or Z may be passed as NULL. */ +void +_gcry_mpi_point_get (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z, + mpi_point_t point) +{ + if (x) + mpi_set (x, point->x); + if (y) + mpi_set (y, point->y); + if (z) + mpi_set (z, point->z); +} + + +/* Set the projective coordinates from POINT into X, Y, and Z and + release POINT. If a coordinate is not required, X, Y, or Z may be + passed as NULL. */ +void +_gcry_mpi_point_snatch_get (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z, + mpi_point_t point) +{ + mpi_snatch (x, point->x); + mpi_snatch (y, point->y); + mpi_snatch (z, point->z); + xfree (point); +} + + +/* Set the projective coordinates from X, Y, and Z into POINT. If a + coordinate is given as NULL, the value 0 is stored into point. If + POINT is given as NULL a new point object is allocated. Returns + POINT or the newly allocated point object. */ +mpi_point_t +_gcry_mpi_point_set (mpi_point_t point, + gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z) +{ + if (!point) + point = mpi_point_new (0); + + if (x) + mpi_set (point->x, x); + else + mpi_clear (point->x); + if (y) + mpi_set (point->y, y); + else + mpi_clear (point->y); + if (z) + mpi_set (point->z, z); + else + mpi_clear (point->z); + + return point; +} + + +/* Set the projective coordinates from X, Y, and Z into POINT. If a + coordinate is given as NULL, the value 0 is stored into point. If + POINT is given as NULL a new point object is allocated. The + coordinates X, Y, and Z are released. Returns POINT or the newly + allocated point object. */ +mpi_point_t +_gcry_mpi_point_snatch_set (mpi_point_t point, + gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z) +{ + if (!point) + point = mpi_point_new (0); + + if (x) + mpi_snatch (point->x, x); + else + mpi_clear (point->x); + if (y) + mpi_snatch (point->y, y); + else + mpi_clear (point->y); + if (z) + mpi_snatch (point->z, z); + else + mpi_clear (point->z); + + return point; +} + + +/* W = W mod P. */ +static void +ec_mod (gcry_mpi_t w, mpi_ec_t ec) +{ + if (0 && ec->dialect == ECC_DIALECT_ED25519) + _gcry_mpi_ec_ed25519_mod (w); + else if (ec->t.p_barrett) + _gcry_mpi_mod_barrett (w, w, ec->t.p_barrett); + else + _gcry_mpi_mod (w, w, ec->p); +} + +static void +ec_addm (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx) +{ + mpi_add (w, u, v); + ec_mod (w, ctx); +} + +static void +ec_subm (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ec) +{ + mpi_sub (w, u, v); + while (w->sign) + mpi_add (w, w, ec->p); + /*ec_mod (w, ec);*/ +} + +static void +ec_mulm (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx) +{ + mpi_mul (w, u, v); + ec_mod (w, ctx); +} + +/* W = 2 * U mod P. */ +static void +ec_mul2 (gcry_mpi_t w, gcry_mpi_t u, mpi_ec_t ctx) +{ + mpi_lshift (w, u, 1); + ec_mod (w, ctx); +} + +static void +ec_powm (gcry_mpi_t w, const gcry_mpi_t b, const gcry_mpi_t e, + mpi_ec_t ctx) +{ + mpi_powm (w, b, e, ctx->p); + /* _gcry_mpi_abs (w); */ +} + + +/* Shortcut for + ec_powm (B, B, mpi_const (MPI_C_TWO), ctx); + for easier optimization. */ +static void +ec_pow2 (gcry_mpi_t w, const gcry_mpi_t b, mpi_ec_t ctx) +{ + /* Using mpi_mul is slightly faster (at least on amd64). */ + /* mpi_powm (w, b, mpi_const (MPI_C_TWO), ctx->p); */ + ec_mulm (w, b, b, ctx); +} + + +/* Shortcut for + ec_powm (B, B, mpi_const (MPI_C_THREE), ctx); + for easier optimization. */ +static void +ec_pow3 (gcry_mpi_t w, const gcry_mpi_t b, mpi_ec_t ctx) +{ + mpi_powm (w, b, mpi_const (MPI_C_THREE), ctx->p); +} + + +static void +ec_invm (gcry_mpi_t x, gcry_mpi_t a, mpi_ec_t ctx) +{ + if (!mpi_invm (x, a, ctx->p)) + { + log_error ("ec_invm: inverse does not exist:\n"); + log_mpidump (" a", a); + log_mpidump (" p", ctx->p); + } +} + +/* Routines for 2^255 - 19. */ + +#define LIMB_SIZE_25519 ((256+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB) + +static void +ec_addm_25519 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx) +{ + mpi_ptr_t wp, up, vp; + mpi_size_t wsize = LIMB_SIZE_25519; + mpi_limb_t n[LIMB_SIZE_25519]; + mpi_limb_t borrow; + + if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize) + log_bug ("addm_25519: different sizes\n"); + + memset (n, 0, sizeof n); + up = u->d; + vp = v->d; + wp = w->d; + + _gcry_mpih_add_n (wp, up, vp, wsize); + borrow = _gcry_mpih_sub_n (wp, wp, ctx->p->d, wsize); + mpih_set_cond (n, ctx->p->d, wsize, (borrow != 0UL)); + _gcry_mpih_add_n (wp, wp, n, wsize); + wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB)); +} + +static void +ec_subm_25519 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx) +{ + mpi_ptr_t wp, up, vp; + mpi_size_t wsize = LIMB_SIZE_25519; + mpi_limb_t n[LIMB_SIZE_25519]; + mpi_limb_t borrow; + + if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize) + log_bug ("subm_25519: different sizes\n"); + + memset (n, 0, sizeof n); + up = u->d; + vp = v->d; + wp = w->d; + + borrow = _gcry_mpih_sub_n (wp, up, vp, wsize); + mpih_set_cond (n, ctx->p->d, wsize, (borrow != 0UL)); + _gcry_mpih_add_n (wp, wp, n, wsize); + wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB)); +} + +static void +ec_mulm_25519 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx) +{ + mpi_ptr_t wp, up, vp; + mpi_size_t wsize = LIMB_SIZE_25519; + mpi_limb_t n[LIMB_SIZE_25519*2]; + mpi_limb_t m[LIMB_SIZE_25519+1]; + mpi_limb_t cy; + int msb; + + (void)ctx; + if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize) + log_bug ("mulm_25519: different sizes\n"); + + up = u->d; + vp = v->d; + wp = w->d; + + _gcry_mpih_mul_n (n, up, vp, wsize); + memcpy (wp, n, wsize * BYTES_PER_MPI_LIMB); + wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB)); + + memcpy (m, n+LIMB_SIZE_25519-1, (wsize+1) * BYTES_PER_MPI_LIMB); + _gcry_mpih_rshift (m, m, LIMB_SIZE_25519+1, (255 % BITS_PER_MPI_LIMB)); + + memcpy (n, m, wsize * BYTES_PER_MPI_LIMB); + cy = _gcry_mpih_lshift (m, m, LIMB_SIZE_25519, 4); + m[LIMB_SIZE_25519] = cy; + cy = _gcry_mpih_add_n (m, m, n, wsize); + m[LIMB_SIZE_25519] += cy; + cy = _gcry_mpih_add_n (m, m, n, wsize); + m[LIMB_SIZE_25519] += cy; + cy = _gcry_mpih_add_n (m, m, n, wsize); + m[LIMB_SIZE_25519] += cy; + + cy = _gcry_mpih_add_n (wp, wp, m, wsize); + m[LIMB_SIZE_25519] += cy; + + memset (m, 0, wsize * BYTES_PER_MPI_LIMB); + msb = (wp[LIMB_SIZE_25519-1] >> (255 % BITS_PER_MPI_LIMB)); + m[0] = (m[LIMB_SIZE_25519] * 2 + msb) * 19; + wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB)); + _gcry_mpih_add_n (wp, wp, m, wsize); + + m[0] = 0; + cy = _gcry_mpih_sub_n (wp, wp, ctx->p->d, wsize); + mpih_set_cond (m, ctx->p->d, wsize, (cy != 0UL)); + _gcry_mpih_add_n (wp, wp, m, wsize); +} + +static void +ec_mul2_25519 (gcry_mpi_t w, gcry_mpi_t u, mpi_ec_t ctx) +{ + ec_addm_25519 (w, u, u, ctx); +} + +static void +ec_pow2_25519 (gcry_mpi_t w, const gcry_mpi_t b, mpi_ec_t ctx) +{ + ec_mulm_25519 (w, b, b, ctx); +} + +/* Routines for 2^448 - 2^224 - 1. */ + +#define LIMB_SIZE_448 ((448+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB) +#define LIMB_SIZE_HALF_448 ((LIMB_SIZE_448+1)/2) + +static void +ec_addm_448 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx) +{ + mpi_ptr_t wp, up, vp; + mpi_size_t wsize = LIMB_SIZE_448; + mpi_limb_t n[LIMB_SIZE_448]; + mpi_limb_t cy; + + if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize) + log_bug ("addm_448: different sizes\n"); + + memset (n, 0, sizeof n); + up = u->d; + vp = v->d; + wp = w->d; + + cy = _gcry_mpih_add_n (wp, up, vp, wsize); + mpih_set_cond (n, ctx->p->d, wsize, (cy != 0UL)); + _gcry_mpih_sub_n (wp, wp, n, wsize); +} + +static void +ec_subm_448 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx) +{ + mpi_ptr_t wp, up, vp; + mpi_size_t wsize = LIMB_SIZE_448; + mpi_limb_t n[LIMB_SIZE_448]; + mpi_limb_t borrow; + + if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize) + log_bug ("subm_448: different sizes\n"); + + memset (n, 0, sizeof n); + up = u->d; + vp = v->d; + wp = w->d; + + borrow = _gcry_mpih_sub_n (wp, up, vp, wsize); + mpih_set_cond (n, ctx->p->d, wsize, (borrow != 0UL)); + _gcry_mpih_add_n (wp, wp, n, wsize); +} + +static void +ec_mulm_448 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx) +{ + mpi_ptr_t wp, up, vp; + mpi_size_t wsize = LIMB_SIZE_448; + mpi_limb_t n[LIMB_SIZE_448*2]; + mpi_limb_t a2[LIMB_SIZE_HALF_448]; + mpi_limb_t a3[LIMB_SIZE_HALF_448]; + mpi_limb_t b0[LIMB_SIZE_HALF_448]; + mpi_limb_t b1[LIMB_SIZE_HALF_448]; + mpi_limb_t cy; + int i; +#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2) + mpi_limb_t b1_rest, a3_rest; +#endif + + if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize) + log_bug ("mulm_448: different sizes\n"); + + up = u->d; + vp = v->d; + wp = w->d; + + _gcry_mpih_mul_n (n, up, vp, wsize); + + for (i = 0; i < (wsize + 1)/ 2; i++) + { + b0[i] = n[i]; + b1[i] = n[i+wsize/2]; + a2[i] = n[i+wsize]; + a3[i] = n[i+wsize+wsize/2]; + } + +#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2) + b0[LIMB_SIZE_HALF_448-1] &= ((mpi_limb_t)1UL<<32)-1; + a2[LIMB_SIZE_HALF_448-1] &= ((mpi_limb_t)1UL<<32)-1; + + b1_rest = 0; + a3_rest = 0; + + for (i = (wsize + 1)/ 2 -1; i >= 0; i--) + { + mpi_limb_t b1v, a3v; + b1v = b1[i]; + a3v = a3[i]; + b1[i] = (b1_rest<<32) | (b1v >> 32); + a3[i] = (a3_rest<<32) | (a3v >> 32); + b1_rest = b1v & (((mpi_limb_t)1UL <<32)-1); + a3_rest = a3v & (((mpi_limb_t)1UL <<32)-1); + } +#endif + + cy = _gcry_mpih_add_n (b0, b0, a2, LIMB_SIZE_HALF_448); + cy += _gcry_mpih_add_n (b0, b0, a3, LIMB_SIZE_HALF_448); + for (i = 0; i < (wsize + 1)/ 2; i++) + wp[i] = b0[i]; +#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2) + wp[LIMB_SIZE_HALF_448-1] &= (((mpi_limb_t)1UL <<32)-1); +#endif + +#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2) + cy = b0[LIMB_SIZE_HALF_448-1] >> 32; +#endif + + cy = _gcry_mpih_add_1 (b1, b1, LIMB_SIZE_HALF_448, cy); + cy += _gcry_mpih_add_n (b1, b1, a2, LIMB_SIZE_HALF_448); + cy += _gcry_mpih_add_n (b1, b1, a3, LIMB_SIZE_HALF_448); + cy += _gcry_mpih_add_n (b1, b1, a3, LIMB_SIZE_HALF_448); +#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2) + b1_rest = 0; + for (i = (wsize + 1)/ 2 -1; i >= 0; i--) + { + mpi_limb_t b1v = b1[i]; + b1[i] = (b1_rest<<32) | (b1v >> 32); + b1_rest = b1v & (((mpi_limb_t)1UL <<32)-1); + } + wp[LIMB_SIZE_HALF_448-1] |= (b1_rest << 32); +#endif + for (i = 0; i < wsize / 2; i++) + wp[i+(wsize + 1) / 2] = b1[i]; + +#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2) + cy = b1[LIMB_SIZE_HALF_448-1]; +#endif + + memset (n, 0, wsize * BYTES_PER_MPI_LIMB); + +#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2) + n[LIMB_SIZE_HALF_448-1] = cy << 32; +#else + n[LIMB_SIZE_HALF_448] = cy; +#endif + n[0] = cy; + _gcry_mpih_add_n (wp, wp, n, wsize); + + memset (n, 0, wsize * BYTES_PER_MPI_LIMB); + cy = _gcry_mpih_sub_n (wp, wp, ctx->p->d, wsize); + mpih_set_cond (n, ctx->p->d, wsize, (cy != 0UL)); + _gcry_mpih_add_n (wp, wp, n, wsize); +} + +static void +ec_mul2_448 (gcry_mpi_t w, gcry_mpi_t u, mpi_ec_t ctx) +{ + ec_addm_448 (w, u, u, ctx); +} + +static void +ec_pow2_448 (gcry_mpi_t w, const gcry_mpi_t b, mpi_ec_t ctx) +{ + ec_mulm_448 (w, b, b, ctx); +} + +struct field_table { + const char *p; + + /* computation routines for the field. */ + void (* addm) (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx); + void (* subm) (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx); + void (* mulm) (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx); + void (* mul2) (gcry_mpi_t w, gcry_mpi_t u, mpi_ec_t ctx); + void (* pow2) (gcry_mpi_t w, const gcry_mpi_t b, mpi_ec_t ctx); +}; + +static const struct field_table field_table[] = { + { + "0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED", + ec_addm_25519, + ec_subm_25519, + ec_mulm_25519, + ec_mul2_25519, + ec_pow2_25519 + }, + { + "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE" + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", + ec_addm_448, + ec_subm_448, + ec_mulm_448, + ec_mul2_448, + ec_pow2_448 + }, + { NULL, NULL, NULL, NULL, NULL, NULL }, +}; + +/* Force recomputation of all helper variables. */ +void +_gcry_mpi_ec_get_reset (mpi_ec_t ec) +{ + ec->t.valid.a_is_pminus3 = 0; + ec->t.valid.two_inv_p = 0; +} + + +/* Accessor for helper variable. */ +static int +ec_get_a_is_pminus3 (mpi_ec_t ec) +{ + gcry_mpi_t tmp; + + if (!ec->t.valid.a_is_pminus3) + { + ec->t.valid.a_is_pminus3 = 1; + tmp = mpi_alloc_like (ec->p); + mpi_sub_ui (tmp, ec->p, 3); + ec->t.a_is_pminus3 = !mpi_cmp (ec->a, tmp); + mpi_free (tmp); + } + + return ec->t.a_is_pminus3; +} + + +/* Accessor for helper variable. */ +static gcry_mpi_t +ec_get_two_inv_p (mpi_ec_t ec) +{ + if (!ec->t.valid.two_inv_p) + { + ec->t.valid.two_inv_p = 1; + if (!ec->t.two_inv_p) + ec->t.two_inv_p = mpi_alloc (0); + ec_invm (ec->t.two_inv_p, mpi_const (MPI_C_TWO), ec); + } + return ec->t.two_inv_p; +} + + +static const char *const curve25519_bad_points[] = { + "0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed", + "0x0000000000000000000000000000000000000000000000000000000000000000", + "0x0000000000000000000000000000000000000000000000000000000000000001", + "0x00b8495f16056286fdb1329ceb8d09da6ac49ff1fae35616aeb8413b7c7aebe0", + "0x57119fd0dd4e22d8868e1c58c45c44045bef839c55b1d0b1248c50a3bc959c5f", + "0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffec", + "0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffee", + NULL +}; + + +static const char *const curve448_bad_points[] = { + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffe" + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + "0x00000000000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000000000", + "0x00000000000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000000001", + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffe" + "fffffffffffffffffffffffffffffffffffffffffffffffffffffffe", + "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + "00000000000000000000000000000000000000000000000000000000", + NULL +}; + +static const char *const *bad_points_table[] = { + curve25519_bad_points, + curve448_bad_points, +}; + +static gcry_mpi_t +scanval (const char *string) +{ + gpg_err_code_t rc; + gcry_mpi_t val; + + rc = _gcry_mpi_scan (&val, GCRYMPI_FMT_HEX, string, 0, NULL); + if (rc) + log_fatal ("scanning ECC parameter failed: %s\n", gpg_strerror (rc)); + return val; +} + + +/* This function initialized a context for elliptic curve based on the + field GF(p). P is the prime specifying this field, A is the first + coefficient. CTX is expected to be zeroized. */ +static void +ec_p_init (mpi_ec_t ctx, enum gcry_mpi_ec_models model, + enum ecc_dialects dialect, + int flags, + gcry_mpi_t p, gcry_mpi_t a, gcry_mpi_t b) +{ + int i; + static int use_barrett; + + if (!use_barrett) + { + if (getenv ("GCRYPT_BARRETT")) + use_barrett = 1; + else + use_barrett = -1; + } + + /* Fixme: Do we want to check some constraints? e.g. a < p */ + + ctx->model = model; + ctx->dialect = dialect; + ctx->flags = flags; + ctx->nbits = mpi_get_nbits (p); + ctx->p = mpi_copy (p); + ctx->a = mpi_copy (a); + ctx->b = mpi_copy (b); + + ctx->t.p_barrett = use_barrett > 0? _gcry_mpi_barrett_init (ctx->p, 0):NULL; + + _gcry_mpi_ec_get_reset (ctx); + + if (model == MPI_EC_MONTGOMERY) + { + for (i=0; i< DIM(bad_points_table); i++) + { + gcry_mpi_t p_candidate = scanval (bad_points_table[i][0]); + int match_p = !mpi_cmp (ctx->p, p_candidate); + int j; + + mpi_free (p_candidate); + if (!match_p) + continue; + + for (j=0; i< DIM(ctx->t.scratch) && bad_points_table[i][j]; j++) + ctx->t.scratch[j] = scanval (bad_points_table[i][j]); + } + } + else + { + /* Allocate scratch variables. */ + for (i=0; i< DIM(ctx->t.scratch); i++) + ctx->t.scratch[i] = mpi_alloc_like (ctx->p); + } + + ctx->addm = ec_addm; + ctx->subm = ec_subm; + ctx->mulm = ec_mulm; + ctx->mul2 = ec_mul2; + ctx->pow2 = ec_pow2; + + for (i=0; field_table[i].p; i++) + { + gcry_mpi_t f_p; + gpg_err_code_t rc; + + rc = _gcry_mpi_scan (&f_p, GCRYMPI_FMT_HEX, field_table[i].p, 0, NULL); + if (rc) + log_fatal ("scanning ECC parameter failed: %s\n", gpg_strerror (rc)); + + if (!mpi_cmp (p, f_p)) + { + ctx->addm = field_table[i].addm; + ctx->subm = field_table[i].subm; + ctx->mulm = field_table[i].mulm; + ctx->mul2 = field_table[i].mul2; + ctx->pow2 = field_table[i].pow2; + _gcry_mpi_release (f_p); + + mpi_resize (ctx->a, ctx->p->nlimbs); + ctx->a->nlimbs = ctx->p->nlimbs; + + mpi_resize (ctx->b, ctx->p->nlimbs); + ctx->b->nlimbs = ctx->p->nlimbs; + + for (i=0; i< DIM(ctx->t.scratch) && ctx->t.scratch[i]; i++) + ctx->t.scratch[i]->nlimbs = ctx->p->nlimbs; + + break; + } + + _gcry_mpi_release (f_p); + } + + /* Prepare for fast reduction. */ + /* FIXME: need a test for NIST values. However it does not gain us + any real advantage, for 384 bits it is actually slower than using + mpi_mulm. */ +/* ctx->nist_nbits = mpi_get_nbits (ctx->p); */ +/* if (ctx->nist_nbits == 192) */ +/* { */ +/* for (i=0; i < 4; i++) */ +/* ctx->s[i] = mpi_new (192); */ +/* ctx->c = mpi_new (192*2); */ +/* } */ +/* else if (ctx->nist_nbits == 384) */ +/* { */ +/* for (i=0; i < 10; i++) */ +/* ctx->s[i] = mpi_new (384); */ +/* ctx->c = mpi_new (384*2); */ +/* } */ +} + + +static void +ec_deinit (void *opaque) +{ + mpi_ec_t ctx = opaque; + int i; + + _gcry_mpi_barrett_free (ctx->t.p_barrett); + + /* Domain parameter. */ + mpi_free (ctx->p); + mpi_free (ctx->a); + mpi_free (ctx->b); + _gcry_mpi_point_release (ctx->G); + mpi_free (ctx->n); + + /* The key. */ + _gcry_mpi_point_release (ctx->Q); + mpi_free (ctx->d); + + /* Private data of ec.c. */ + mpi_free (ctx->t.two_inv_p); + + for (i=0; i< DIM(ctx->t.scratch); i++) + mpi_free (ctx->t.scratch[i]); + +/* if (ctx->nist_nbits == 192) */ +/* { */ +/* for (i=0; i < 4; i++) */ +/* mpi_free (ctx->s[i]); */ +/* mpi_free (ctx->c); */ +/* } */ +/* else if (ctx->nist_nbits == 384) */ +/* { */ +/* for (i=0; i < 10; i++) */ +/* mpi_free (ctx->s[i]); */ +/* mpi_free (ctx->c); */ +/* } */ +} + + +/* This function returns a new context for elliptic curve based on the + field GF(p). P is the prime specifying this field, A is the first + coefficient, B is the second coefficient, and MODEL is the model + for the curve. This function is only used within Libgcrypt and not + part of the public API. + + This context needs to be released using _gcry_mpi_ec_free. */ +mpi_ec_t +_gcry_mpi_ec_p_internal_new (enum gcry_mpi_ec_models model, + enum ecc_dialects dialect, + int flags, + gcry_mpi_t p, gcry_mpi_t a, gcry_mpi_t b) +{ + mpi_ec_t ctx; + + ctx = xcalloc (1, sizeof *ctx); + ec_p_init (ctx, model, dialect, flags, p, a, b); + + return ctx; +} + + +/* This is a variant of _gcry_mpi_ec_p_internal_new which returns an + public context and does some error checking on the supplied + arguments. On success the new context is stored at R_CTX and 0 is + returned; on error NULL is stored at R_CTX and an error code is + returned. + + The context needs to be released using gcry_ctx_release. */ +gpg_err_code_t +_gcry_mpi_ec_p_new (gcry_ctx_t *r_ctx, + enum gcry_mpi_ec_models model, + enum ecc_dialects dialect, + int flags, + gcry_mpi_t p, gcry_mpi_t a, gcry_mpi_t b) +{ + gcry_ctx_t ctx; + mpi_ec_t ec; + + *r_ctx = NULL; + if (!p || !a) + return GPG_ERR_EINVAL; + + ctx = _gcry_ctx_alloc (CONTEXT_TYPE_EC, sizeof *ec, ec_deinit); + if (!ctx) + return gpg_err_code_from_syserror (); + ec = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC); + ec_p_init (ec, model, dialect, flags, p, a, b); + + *r_ctx = ctx; + return 0; +} + + +void +_gcry_mpi_ec_free (mpi_ec_t ctx) +{ + if (ctx) + { + ec_deinit (ctx); + xfree (ctx); + } +} + + +gcry_mpi_t +_gcry_mpi_ec_get_mpi (const char *name, gcry_ctx_t ctx, int copy) +{ + mpi_ec_t ec = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC); + + return _gcry_ecc_get_mpi (name, ec, copy); +} + + +gcry_mpi_point_t +_gcry_mpi_ec_get_point (const char *name, gcry_ctx_t ctx, int copy) +{ + mpi_ec_t ec = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC); + + (void)copy; /* Not used. */ + + return _gcry_ecc_get_point (name, ec); +} + + +gpg_err_code_t +_gcry_mpi_ec_set_mpi (const char *name, gcry_mpi_t newvalue, + gcry_ctx_t ctx) +{ + mpi_ec_t ec = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC); + + return _gcry_ecc_set_mpi (name, newvalue, ec); +} + + +gpg_err_code_t +_gcry_mpi_ec_set_point (const char *name, gcry_mpi_point_t newvalue, + gcry_ctx_t ctx) +{ + mpi_ec_t ec = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC); + + return _gcry_ecc_set_point (name, newvalue, ec); +} + + +/* Given an encoded point in the MPI VALUE and a context EC, decode + * the point according to the context and store it in RESULT. On + * error an error code is return but RESULT might have been changed. + * If no context is given the function tries to decode VALUE by + * assuming a 0x04 prefixed uncompressed encoding. */ +gpg_err_code_t +_gcry_mpi_ec_decode_point (mpi_point_t result, gcry_mpi_t value, mpi_ec_t ec) +{ + gpg_err_code_t rc; + + if (ec + && (ec->dialect == ECC_DIALECT_ED25519 + || (ec->model == MPI_EC_EDWARDS + && ec->dialect == ECC_DIALECT_SAFECURVE))) + rc = _gcry_ecc_eddsa_decodepoint (value, ec, result, NULL, NULL); + else if (ec && ec->model == MPI_EC_MONTGOMERY) + rc = _gcry_ecc_mont_decodepoint (value, ec, result); + else + rc = _gcry_ecc_sec_decodepoint (value, ec, result); + + return rc; +} + + +/* Compute the affine coordinates from the projective coordinates in + POINT. Set them into X and Y. If one coordinate is not required, + X or Y may be passed as NULL. CTX is the usual context. Returns: 0 + on success or !0 if POINT is at infinity. */ +int +_gcry_mpi_ec_get_affine (gcry_mpi_t x, gcry_mpi_t y, mpi_point_t point, + mpi_ec_t ctx) +{ + if (!mpi_cmp_ui (point->z, 0)) + return -1; + + switch (ctx->model) + { + case MPI_EC_WEIERSTRASS: /* Using Jacobian coordinates. */ + { + gcry_mpi_t z1, z2, z3; + + z1 = mpi_new (0); + z2 = mpi_new (0); + ec_invm (z1, point->z, ctx); /* z1 = z^(-1) mod p */ + ec_mulm (z2, z1, z1, ctx); /* z2 = z^(-2) mod p */ + + if (x) + ec_mulm (x, point->x, z2, ctx); + + if (y) + { + z3 = mpi_new (0); + ec_mulm (z3, z2, z1, ctx); /* z3 = z^(-3) mod p */ + ec_mulm (y, point->y, z3, ctx); + mpi_free (z3); + } + + mpi_free (z2); + mpi_free (z1); + } + return 0; + + case MPI_EC_MONTGOMERY: + { + if (x) + mpi_set (x, point->x); + + if (y) + { + log_fatal ("%s: Getting Y-coordinate on %s is not supported\n", + "_gcry_mpi_ec_get_affine", "Montgomery"); + return -1; + } + } + return 0; + + case MPI_EC_EDWARDS: + { + gcry_mpi_t z; + + z = mpi_new (0); + ec_invm (z, point->z, ctx); + + mpi_resize (z, ctx->p->nlimbs); + z->nlimbs = ctx->p->nlimbs; + + if (x) + { + mpi_resize (x, ctx->p->nlimbs); + x->nlimbs = ctx->p->nlimbs; + ctx->mulm (x, point->x, z, ctx); + } + if (y) + { + mpi_resize (y, ctx->p->nlimbs); + y->nlimbs = ctx->p->nlimbs; + ctx->mulm (y, point->y, z, ctx); + } + + _gcry_mpi_release (z); + } + return 0; + + default: + return -1; + } +} + + + +/* RESULT = 2 * POINT (Weierstrass version). */ +static void +dup_point_weierstrass (mpi_point_t result, mpi_point_t point, mpi_ec_t ctx) +{ +#define x3 (result->x) +#define y3 (result->y) +#define z3 (result->z) +#define t1 (ctx->t.scratch[0]) +#define t2 (ctx->t.scratch[1]) +#define t3 (ctx->t.scratch[2]) +#define l1 (ctx->t.scratch[3]) +#define l2 (ctx->t.scratch[4]) +#define l3 (ctx->t.scratch[5]) + + if (!mpi_cmp_ui (point->y, 0) || !mpi_cmp_ui (point->z, 0)) + { + /* P_y == 0 || P_z == 0 => [1:1:0] */ + mpi_set_ui (x3, 1); + mpi_set_ui (y3, 1); + mpi_set_ui (z3, 0); + } + else + { + if (ec_get_a_is_pminus3 (ctx)) /* Use the faster case. */ + { + /* L1 = 3(X - Z^2)(X + Z^2) */ + /* T1: used for Z^2. */ + /* T2: used for the right term. */ + ec_pow2 (t1, point->z, ctx); + ec_subm (l1, point->x, t1, ctx); + ec_mulm (l1, l1, mpi_const (MPI_C_THREE), ctx); + ec_addm (t2, point->x, t1, ctx); + ec_mulm (l1, l1, t2, ctx); + } + else /* Standard case. */ + { + /* L1 = 3X^2 + aZ^4 */ + /* T1: used for aZ^4. */ + ec_pow2 (l1, point->x, ctx); + ec_mulm (l1, l1, mpi_const (MPI_C_THREE), ctx); + ec_powm (t1, point->z, mpi_const (MPI_C_FOUR), ctx); + ec_mulm (t1, t1, ctx->a, ctx); + ec_addm (l1, l1, t1, ctx); + } + /* Z3 = 2YZ */ + ec_mulm (z3, point->y, point->z, ctx); + ec_mul2 (z3, z3, ctx); + + /* L2 = 4XY^2 */ + /* T2: used for Y2; required later. */ + ec_pow2 (t2, point->y, ctx); + ec_mulm (l2, t2, point->x, ctx); + ec_mulm (l2, l2, mpi_const (MPI_C_FOUR), ctx); + + /* X3 = L1^2 - 2L2 */ + /* T1: used for L2^2. */ + ec_pow2 (x3, l1, ctx); + ec_mul2 (t1, l2, ctx); + ec_subm (x3, x3, t1, ctx); + + /* L3 = 8Y^4 */ + /* T2: taken from above. */ + ec_pow2 (t2, t2, ctx); + ec_mulm (l3, t2, mpi_const (MPI_C_EIGHT), ctx); + + /* Y3 = L1(L2 - X3) - L3 */ + ec_subm (y3, l2, x3, ctx); + ec_mulm (y3, y3, l1, ctx); + ec_subm (y3, y3, l3, ctx); + } + +#undef x3 +#undef y3 +#undef z3 +#undef t1 +#undef t2 +#undef t3 +#undef l1 +#undef l2 +#undef l3 +} + + +/* RESULT = 2 * POINT (Montgomery version). */ +static void +dup_point_montgomery (mpi_point_t result, mpi_point_t point, mpi_ec_t ctx) +{ + (void)result; + (void)point; + (void)ctx; + log_fatal ("%s: %s not yet supported\n", + "_gcry_mpi_ec_dup_point", "Montgomery"); +} + + +/* RESULT = 2 * POINT (Twisted Edwards version). */ +static void +dup_point_edwards (mpi_point_t result, mpi_point_t point, mpi_ec_t ctx) +{ +#define X1 (point->x) +#define Y1 (point->y) +#define Z1 (point->z) +#define X3 (result->x) +#define Y3 (result->y) +#define Z3 (result->z) +#define B (ctx->t.scratch[0]) +#define C (ctx->t.scratch[1]) +#define D (ctx->t.scratch[2]) +#define E (ctx->t.scratch[3]) +#define F (ctx->t.scratch[4]) +#define H (ctx->t.scratch[5]) +#define J (ctx->t.scratch[6]) + + /* Compute: (X_3 : Y_3 : Z_3) = 2( X_1 : Y_1 : Z_1 ) */ + + /* B = (X_1 + Y_1)^2 */ + ctx->addm (B, X1, Y1, ctx); + ctx->pow2 (B, B, ctx); + + /* C = X_1^2 */ + /* D = Y_1^2 */ + ctx->pow2 (C, X1, ctx); + ctx->pow2 (D, Y1, ctx); + + /* E = aC */ + if (ctx->dialect == ECC_DIALECT_ED25519) + ctx->subm (E, ctx->p, C, ctx); + else + ctx->mulm (E, ctx->a, C, ctx); + + /* F = E + D */ + ctx->addm (F, E, D, ctx); + + /* H = Z_1^2 */ + ctx->pow2 (H, Z1, ctx); + + /* J = F - 2H */ + ctx->mul2 (J, H, ctx); + ctx->subm (J, F, J, ctx); + + /* X_3 = (B - C - D) · J */ + ctx->subm (X3, B, C, ctx); + ctx->subm (X3, X3, D, ctx); + ctx->mulm (X3, X3, J, ctx); + + /* Y_3 = F · (E - D) */ + ctx->subm (Y3, E, D, ctx); + ctx->mulm (Y3, Y3, F, ctx); + + /* Z_3 = F · J */ + ctx->mulm (Z3, F, J, ctx); + +#undef X1 +#undef Y1 +#undef Z1 +#undef X3 +#undef Y3 +#undef Z3 +#undef B +#undef C +#undef D +#undef E +#undef F +#undef H +#undef J +} + + +/* RESULT = 2 * POINT */ +void +_gcry_mpi_ec_dup_point (mpi_point_t result, mpi_point_t point, mpi_ec_t ctx) +{ + switch (ctx->model) + { + case MPI_EC_WEIERSTRASS: + dup_point_weierstrass (result, point, ctx); + break; + case MPI_EC_MONTGOMERY: + dup_point_montgomery (result, point, ctx); + break; + case MPI_EC_EDWARDS: + dup_point_edwards (result, point, ctx); + break; + } +} + + +/* RESULT = P1 + P2 (Weierstrass version).*/ +static void +add_points_weierstrass (mpi_point_t result, + mpi_point_t p1, mpi_point_t p2, + mpi_ec_t ctx) +{ +#define x1 (p1->x ) +#define y1 (p1->y ) +#define z1 (p1->z ) +#define x2 (p2->x ) +#define y2 (p2->y ) +#define z2 (p2->z ) +#define x3 (result->x) +#define y3 (result->y) +#define z3 (result->z) +#define l1 (ctx->t.scratch[0]) +#define l2 (ctx->t.scratch[1]) +#define l3 (ctx->t.scratch[2]) +#define l4 (ctx->t.scratch[3]) +#define l5 (ctx->t.scratch[4]) +#define l6 (ctx->t.scratch[5]) +#define l7 (ctx->t.scratch[6]) +#define l8 (ctx->t.scratch[7]) +#define l9 (ctx->t.scratch[8]) +#define t1 (ctx->t.scratch[9]) +#define t2 (ctx->t.scratch[10]) + + if ( (!mpi_cmp (x1, x2)) && (!mpi_cmp (y1, y2)) && (!mpi_cmp (z1, z2)) ) + { + /* Same point; need to call the duplicate function. */ + _gcry_mpi_ec_dup_point (result, p1, ctx); + } + else if (!mpi_cmp_ui (z1, 0)) + { + /* P1 is at infinity. */ + mpi_set (x3, p2->x); + mpi_set (y3, p2->y); + mpi_set (z3, p2->z); + } + else if (!mpi_cmp_ui (z2, 0)) + { + /* P2 is at infinity. */ + mpi_set (x3, p1->x); + mpi_set (y3, p1->y); + mpi_set (z3, p1->z); + } + else + { + int z1_is_one = !mpi_cmp_ui (z1, 1); + int z2_is_one = !mpi_cmp_ui (z2, 1); + + /* l1 = x1 z2^2 */ + /* l2 = x2 z1^2 */ + if (z2_is_one) + mpi_set (l1, x1); + else + { + ec_pow2 (l1, z2, ctx); + ec_mulm (l1, l1, x1, ctx); + } + if (z1_is_one) + mpi_set (l2, x2); + else + { + ec_pow2 (l2, z1, ctx); + ec_mulm (l2, l2, x2, ctx); + } + /* l3 = l1 - l2 */ + ec_subm (l3, l1, l2, ctx); + /* l4 = y1 z2^3 */ + ec_powm (l4, z2, mpi_const (MPI_C_THREE), ctx); + ec_mulm (l4, l4, y1, ctx); + /* l5 = y2 z1^3 */ + ec_powm (l5, z1, mpi_const (MPI_C_THREE), ctx); + ec_mulm (l5, l5, y2, ctx); + /* l6 = l4 - l5 */ + ec_subm (l6, l4, l5, ctx); + + if (!mpi_cmp_ui (l3, 0)) + { + if (!mpi_cmp_ui (l6, 0)) + { + /* P1 and P2 are the same - use duplicate function. */ + _gcry_mpi_ec_dup_point (result, p1, ctx); + } + else + { + /* P1 is the inverse of P2. */ + mpi_set_ui (x3, 1); + mpi_set_ui (y3, 1); + mpi_set_ui (z3, 0); + } + } + else + { + /* l7 = l1 + l2 */ + ec_addm (l7, l1, l2, ctx); + /* l8 = l4 + l5 */ + ec_addm (l8, l4, l5, ctx); + /* z3 = z1 z2 l3 */ + ec_mulm (z3, z1, z2, ctx); + ec_mulm (z3, z3, l3, ctx); + /* x3 = l6^2 - l7 l3^2 */ + ec_pow2 (t1, l6, ctx); + ec_pow2 (t2, l3, ctx); + ec_mulm (t2, t2, l7, ctx); + ec_subm (x3, t1, t2, ctx); + /* l9 = l7 l3^2 - 2 x3 */ + ec_mul2 (t1, x3, ctx); + ec_subm (l9, t2, t1, ctx); + /* y3 = (l9 l6 - l8 l3^3)/2 */ + ec_mulm (l9, l9, l6, ctx); + ec_powm (t1, l3, mpi_const (MPI_C_THREE), ctx); /* fixme: Use saved value*/ + ec_mulm (t1, t1, l8, ctx); + ec_subm (y3, l9, t1, ctx); + ec_mulm (y3, y3, ec_get_two_inv_p (ctx), ctx); + } + } + +#undef x1 +#undef y1 +#undef z1 +#undef x2 +#undef y2 +#undef z2 +#undef x3 +#undef y3 +#undef z3 +#undef l1 +#undef l2 +#undef l3 +#undef l4 +#undef l5 +#undef l6 +#undef l7 +#undef l8 +#undef l9 +#undef t1 +#undef t2 +} + + +/* RESULT = P1 + P2 (Montgomery version).*/ +static void +add_points_montgomery (mpi_point_t result, + mpi_point_t p1, mpi_point_t p2, + mpi_ec_t ctx) +{ + (void)result; + (void)p1; + (void)p2; + (void)ctx; + log_fatal ("%s: %s not yet supported\n", + "_gcry_mpi_ec_add_points", "Montgomery"); +} + + +/* RESULT = P1 + P2 (Twisted Edwards version).*/ +static void +add_points_edwards (mpi_point_t result, + mpi_point_t p1, mpi_point_t p2, + mpi_ec_t ctx) +{ +#define X1 (p1->x) +#define Y1 (p1->y) +#define Z1 (p1->z) +#define X2 (p2->x) +#define Y2 (p2->y) +#define Z2 (p2->z) +#define X3 (result->x) +#define Y3 (result->y) +#define Z3 (result->z) +#define A (ctx->t.scratch[0]) +#define B (ctx->t.scratch[1]) +#define C (ctx->t.scratch[2]) +#define D (ctx->t.scratch[3]) +#define E (ctx->t.scratch[4]) +#define F (ctx->t.scratch[5]) +#define G (ctx->t.scratch[6]) +#define tmp (ctx->t.scratch[7]) + + point_resize (result, ctx); + + /* Compute: (X_3 : Y_3 : Z_3) = (X_1 : Y_1 : Z_1) + (X_2 : Y_2 : Z_3) */ + + /* A = Z1 · Z2 */ + ctx->mulm (A, Z1, Z2, ctx); + + /* B = A^2 */ + ctx->pow2 (B, A, ctx); + + /* C = X1 · X2 */ + ctx->mulm (C, X1, X2, ctx); + + /* D = Y1 · Y2 */ + ctx->mulm (D, Y1, Y2, ctx); + + /* E = d · C · D */ + ctx->mulm (E, ctx->b, C, ctx); + ctx->mulm (E, E, D, ctx); + + /* F = B - E */ + ctx->subm (F, B, E, ctx); + + /* G = B + E */ + ctx->addm (G, B, E, ctx); + + /* X_3 = A · F · ((X_1 + Y_1) · (X_2 + Y_2) - C - D) */ + ctx->addm (tmp, X1, Y1, ctx); + ctx->addm (X3, X2, Y2, ctx); + ctx->mulm (X3, X3, tmp, ctx); + ctx->subm (X3, X3, C, ctx); + ctx->subm (X3, X3, D, ctx); + ctx->mulm (X3, X3, F, ctx); + ctx->mulm (X3, X3, A, ctx); + + /* Y_3 = A · G · (D - aC) */ + if (ctx->dialect == ECC_DIALECT_ED25519) + { + ctx->addm (Y3, D, C, ctx); + } + else + { + ctx->mulm (Y3, ctx->a, C, ctx); + ctx->subm (Y3, D, Y3, ctx); + } + ctx->mulm (Y3, Y3, G, ctx); + ctx->mulm (Y3, Y3, A, ctx); + + /* Z_3 = F · G */ + ctx->mulm (Z3, F, G, ctx); + + +#undef X1 +#undef Y1 +#undef Z1 +#undef X2 +#undef Y2 +#undef Z2 +#undef X3 +#undef Y3 +#undef Z3 +#undef A +#undef B +#undef C +#undef D +#undef E +#undef F +#undef G +#undef tmp +} + + +/* Compute a step of Montgomery Ladder (only use X and Z in the point). + Inputs: P1, P2, and x-coordinate of DIF = P1 - P1. + Outputs: PRD = 2 * P1 and SUM = P1 + P2. */ +static void +montgomery_ladder (mpi_point_t prd, mpi_point_t sum, + mpi_point_t p1, mpi_point_t p2, gcry_mpi_t dif_x, + mpi_ec_t ctx) +{ + ctx->addm (sum->x, p2->x, p2->z, ctx); + ctx->subm (p2->z, p2->x, p2->z, ctx); + ctx->addm (prd->x, p1->x, p1->z, ctx); + ctx->subm (p1->z, p1->x, p1->z, ctx); + ctx->mulm (p2->x, p1->z, sum->x, ctx); + ctx->mulm (p2->z, prd->x, p2->z, ctx); + ctx->pow2 (p1->x, prd->x, ctx); + ctx->pow2 (p1->z, p1->z, ctx); + ctx->addm (sum->x, p2->x, p2->z, ctx); + ctx->subm (p2->z, p2->x, p2->z, ctx); + ctx->mulm (prd->x, p1->x, p1->z, ctx); + ctx->subm (p1->z, p1->x, p1->z, ctx); + ctx->pow2 (sum->x, sum->x, ctx); + ctx->pow2 (sum->z, p2->z, ctx); + ctx->mulm (prd->z, p1->z, ctx->a, ctx); /* CTX->A: (a-2)/4 */ + ctx->mulm (sum->z, sum->z, dif_x, ctx); + ctx->addm (prd->z, p1->x, prd->z, ctx); + ctx->mulm (prd->z, prd->z, p1->z, ctx); +} + + +/* RESULT = P1 + P2 */ +void +_gcry_mpi_ec_add_points (mpi_point_t result, + mpi_point_t p1, mpi_point_t p2, + mpi_ec_t ctx) +{ + switch (ctx->model) + { + case MPI_EC_WEIERSTRASS: + add_points_weierstrass (result, p1, p2, ctx); + break; + case MPI_EC_MONTGOMERY: + add_points_montgomery (result, p1, p2, ctx); + break; + case MPI_EC_EDWARDS: + add_points_edwards (result, p1, p2, ctx); + break; + } +} + + +/* RESULT = P1 - P2 (Weierstrass version).*/ +static void +sub_points_weierstrass (mpi_point_t result, + mpi_point_t p1, mpi_point_t p2, + mpi_ec_t ctx) +{ + (void)result; + (void)p1; + (void)p2; + (void)ctx; + log_fatal ("%s: %s not yet supported\n", + "_gcry_mpi_ec_sub_points", "Weierstrass"); +} + + +/* RESULT = P1 - P2 (Montgomery version).*/ +static void +sub_points_montgomery (mpi_point_t result, + mpi_point_t p1, mpi_point_t p2, + mpi_ec_t ctx) +{ + (void)result; + (void)p1; + (void)p2; + (void)ctx; + log_fatal ("%s: %s not yet supported\n", + "_gcry_mpi_ec_sub_points", "Montgomery"); +} + + +/* RESULT = P1 - P2 (Twisted Edwards version).*/ +static void +sub_points_edwards (mpi_point_t result, + mpi_point_t p1, mpi_point_t p2, + mpi_ec_t ctx) +{ + mpi_point_t p2i = _gcry_mpi_point_new (0); + point_set (p2i, p2); + ctx->subm (p2i->x, ctx->p, p2i->x, ctx); + add_points_edwards (result, p1, p2i, ctx); + _gcry_mpi_point_release (p2i); +} + + +/* RESULT = P1 - P2 */ +void +_gcry_mpi_ec_sub_points (mpi_point_t result, + mpi_point_t p1, mpi_point_t p2, + mpi_ec_t ctx) +{ + switch (ctx->model) + { + case MPI_EC_WEIERSTRASS: + sub_points_weierstrass (result, p1, p2, ctx); + break; + case MPI_EC_MONTGOMERY: + sub_points_montgomery (result, p1, p2, ctx); + break; + case MPI_EC_EDWARDS: + sub_points_edwards (result, p1, p2, ctx); + break; + } +} + + +/* Scalar point multiplication - the main function for ECC. If takes + an integer SCALAR and a POINT as well as the usual context CTX. + RESULT will be set to the resulting point. */ +void +_gcry_mpi_ec_mul_point (mpi_point_t result, + gcry_mpi_t scalar, mpi_point_t point, + mpi_ec_t ctx) +{ + gcry_mpi_t x1, y1, z1, k, h, yy; + unsigned int i, loops; + mpi_point_struct p1, p2, p1inv; + + if (ctx->model == MPI_EC_EDWARDS + || (ctx->model == MPI_EC_WEIERSTRASS + && mpi_is_secure (scalar))) + { + /* Simple left to right binary method. Algorithm 3.27 from + * {author={Hankerson, Darrel and Menezes, Alfred J. and Vanstone, Scott}, + * title = {Guide to Elliptic Curve Cryptography}, + * year = {2003}, isbn = {038795273X}, + * url = {http://www.cacr.math.uwaterloo.ca/ecc/}, + * publisher = {Springer-Verlag New York, Inc.}} */ + unsigned int nbits; + int j; + + if (mpi_cmp (scalar, ctx->p) >= 0) + nbits = mpi_get_nbits (scalar); + else + nbits = mpi_get_nbits (ctx->p); + + if (ctx->model == MPI_EC_WEIERSTRASS) + { + mpi_set_ui (result->x, 1); + mpi_set_ui (result->y, 1); + mpi_set_ui (result->z, 0); + } + else + { + mpi_set_ui (result->x, 0); + mpi_set_ui (result->y, 1); + mpi_set_ui (result->z, 1); + point_resize (point, ctx); + } + + if (mpi_is_secure (scalar)) + { + /* If SCALAR is in secure memory we assume that it is the + secret key we use constant time operation. */ + mpi_point_struct tmppnt; + + point_init (&tmppnt); + point_resize (result, ctx); + point_resize (&tmppnt, ctx); + for (j=nbits-1; j >= 0; j--) + { + _gcry_mpi_ec_dup_point (result, result, ctx); + _gcry_mpi_ec_add_points (&tmppnt, result, point, ctx); + point_swap_cond (result, &tmppnt, mpi_test_bit (scalar, j), ctx); + } + point_free (&tmppnt); + } + else + { + if (ctx->model == MPI_EC_EDWARDS) + { + point_resize (result, ctx); + point_resize (point, ctx); + } + + for (j=nbits-1; j >= 0; j--) + { + _gcry_mpi_ec_dup_point (result, result, ctx); + if (mpi_test_bit (scalar, j)) + _gcry_mpi_ec_add_points (result, result, point, ctx); + } + } + return; + } + else if (ctx->model == MPI_EC_MONTGOMERY) + { + unsigned int nbits; + int j; + mpi_point_struct p1_, p2_; + mpi_point_t q1, q2, prd, sum; + unsigned long sw; + mpi_size_t rsize; + int scalar_copied = 0; + + /* Compute scalar point multiplication with Montgomery Ladder. + Note that we don't use Y-coordinate in the points at all. + RESULT->Y will be filled by zero. */ + + nbits = mpi_get_nbits (scalar); + point_init (&p1); + point_init (&p2); + point_init (&p1_); + point_init (&p2_); + mpi_set_ui (p1.x, 1); + mpi_free (p2.x); + p2.x = mpi_copy (point->x); + mpi_set_ui (p2.z, 1); + + if (mpi_is_opaque (scalar)) + { + const unsigned int pbits = ctx->nbits; + gcry_mpi_t a; + unsigned int n; + unsigned char *raw; + + scalar_copied = 1; + + raw = _gcry_mpi_get_opaque_copy (scalar, &n); + if ((n+7)/8 != (pbits+7)/8) + log_fatal ("scalar size (%d) != prime size (%d)\n", + (n+7)/8, (pbits+7)/8); + + reverse_buffer (raw, (n+7)/8); + if ((pbits % 8)) + raw[0] &= (1 << (pbits % 8)) - 1; + raw[0] |= (1 << ((pbits + 7) % 8)); + raw[(pbits+7)/8 - 1] &= (256 - ctx->h); + a = mpi_is_secure (scalar) ? mpi_snew (pbits): mpi_new (pbits); + _gcry_mpi_set_buffer (a, raw, (n+7)/8, 0); + xfree (raw); + + scalar = a; + } + + point_resize (&p1, ctx); + point_resize (&p2, ctx); + point_resize (&p1_, ctx); + point_resize (&p2_, ctx); + + mpi_resize (point->x, ctx->p->nlimbs); + point->x->nlimbs = ctx->p->nlimbs; + + q1 = &p1; + q2 = &p2; + prd = &p1_; + sum = &p2_; + + for (j=nbits-1; j >= 0; j--) + { + mpi_point_t t; + + sw = mpi_test_bit (scalar, j); + point_swap_cond (q1, q2, sw, ctx); + montgomery_ladder (prd, sum, q1, q2, point->x, ctx); + point_swap_cond (prd, sum, sw, ctx); + t = q1; q1 = prd; prd = t; + t = q2; q2 = sum; sum = t; + } + + mpi_clear (result->y); + sw = (nbits & 1); + point_swap_cond (&p1, &p1_, sw, ctx); + + rsize = p1.z->nlimbs; + MPN_NORMALIZE (p1.z->d, rsize); + if (rsize == 0) + { + mpi_set_ui (result->x, 1); + mpi_set_ui (result->z, 0); + } + else + { + z1 = mpi_new (0); + ec_invm (z1, p1.z, ctx); + ec_mulm (result->x, p1.x, z1, ctx); + mpi_set_ui (result->z, 1); + mpi_free (z1); + } + + point_free (&p1); + point_free (&p2); + point_free (&p1_); + point_free (&p2_); + if (scalar_copied) + _gcry_mpi_release (scalar); + return; + } + + x1 = mpi_alloc_like (ctx->p); + y1 = mpi_alloc_like (ctx->p); + h = mpi_alloc_like (ctx->p); + k = mpi_copy (scalar); + yy = mpi_copy (point->y); + + if ( mpi_has_sign (k) ) + { + k->sign = 0; + ec_invm (yy, yy, ctx); + } + + if (!mpi_cmp_ui (point->z, 1)) + { + mpi_set (x1, point->x); + mpi_set (y1, yy); + } + else + { + gcry_mpi_t z2, z3; + + z2 = mpi_alloc_like (ctx->p); + z3 = mpi_alloc_like (ctx->p); + ec_mulm (z2, point->z, point->z, ctx); + ec_mulm (z3, point->z, z2, ctx); + ec_invm (z2, z2, ctx); + ec_mulm (x1, point->x, z2, ctx); + ec_invm (z3, z3, ctx); + ec_mulm (y1, yy, z3, ctx); + mpi_free (z2); + mpi_free (z3); + } + z1 = mpi_copy (mpi_const (MPI_C_ONE)); + + mpi_mul (h, k, mpi_const (MPI_C_THREE)); /* h = 3k */ + loops = mpi_get_nbits (h); + if (loops < 2) + { + /* If SCALAR is zero, the above mpi_mul sets H to zero and thus + LOOPs will be zero. To avoid an underflow of I in the main + loop we set LOOP to 2 and the result to (0,0,0). */ + loops = 2; + mpi_clear (result->x); + mpi_clear (result->y); + mpi_clear (result->z); + } + else + { + mpi_set (result->x, point->x); + mpi_set (result->y, yy); + mpi_set (result->z, point->z); + } + mpi_free (yy); yy = NULL; + + p1.x = x1; x1 = NULL; + p1.y = y1; y1 = NULL; + p1.z = z1; z1 = NULL; + point_init (&p2); + point_init (&p1inv); + + /* Invert point: y = p - y mod p */ + point_set (&p1inv, &p1); + ec_subm (p1inv.y, ctx->p, p1inv.y, ctx); + + for (i=loops-2; i > 0; i--) + { + _gcry_mpi_ec_dup_point (result, result, ctx); + if (mpi_test_bit (h, i) == 1 && mpi_test_bit (k, i) == 0) + { + point_set (&p2, result); + _gcry_mpi_ec_add_points (result, &p2, &p1, ctx); + } + if (mpi_test_bit (h, i) == 0 && mpi_test_bit (k, i) == 1) + { + point_set (&p2, result); + _gcry_mpi_ec_add_points (result, &p2, &p1inv, ctx); + } + } + + point_free (&p1); + point_free (&p2); + point_free (&p1inv); + mpi_free (h); + mpi_free (k); +} + + +/* Return true if POINT is on the curve described by CTX. */ +int +_gcry_mpi_ec_curve_point (gcry_mpi_point_t point, mpi_ec_t ctx) +{ + int res = 0; + gcry_mpi_t x, y, w; + + x = mpi_new (0); + y = mpi_new (0); + w = mpi_new (0); + + /* Check that the point is in range. This needs to be done here and + * not after conversion to affine coordinates. */ + if (mpi_cmpabs (point->x, ctx->p) >= 0) + goto leave; + if (mpi_cmpabs (point->y, ctx->p) >= 0) + goto leave; + if (mpi_cmpabs (point->z, ctx->p) >= 0) + goto leave; + + switch (ctx->model) + { + case MPI_EC_WEIERSTRASS: + { + gcry_mpi_t xxx; + + if (_gcry_mpi_ec_get_affine (x, y, point, ctx)) + goto leave; + + xxx = mpi_new (0); + + /* y^2 == x^3 + a·x + b */ + ec_pow2 (y, y, ctx); + + ec_pow3 (xxx, x, ctx); + ec_mulm (w, ctx->a, x, ctx); + ec_addm (w, w, ctx->b, ctx); + ec_addm (w, w, xxx, ctx); + + if (!mpi_cmp (y, w)) + res = 1; + + _gcry_mpi_release (xxx); + } + break; + case MPI_EC_MONTGOMERY: + { +#define xx y + /* With Montgomery curve, only X-coordinate is valid. */ + if (_gcry_mpi_ec_get_affine (x, NULL, point, ctx)) + goto leave; + + /* The equation is: b * y^2 == x^3 + a · x^2 + x */ + /* We check if right hand is quadratic residue or not by + Euler's criterion. */ + /* CTX->A has (a-2)/4 and CTX->B has b^-1 */ + ec_mulm (w, ctx->a, mpi_const (MPI_C_FOUR), ctx); + ec_addm (w, w, mpi_const (MPI_C_TWO), ctx); + ec_mulm (w, w, x, ctx); + ec_pow2 (xx, x, ctx); + ec_addm (w, w, xx, ctx); + ec_addm (w, w, mpi_const (MPI_C_ONE), ctx); + ec_mulm (w, w, x, ctx); + ec_mulm (w, w, ctx->b, ctx); +#undef xx + /* Compute Euler's criterion: w^(p-1)/2 */ +#define p_minus1 y + ec_subm (p_minus1, ctx->p, mpi_const (MPI_C_ONE), ctx); + mpi_rshift (p_minus1, p_minus1, 1); + ec_powm (w, w, p_minus1, ctx); + + res = !mpi_cmp_ui (w, 1); +#undef p_minus1 + } + break; + case MPI_EC_EDWARDS: + { + if (_gcry_mpi_ec_get_affine (x, y, point, ctx)) + goto leave; + + mpi_resize (w, ctx->p->nlimbs); + w->nlimbs = ctx->p->nlimbs; + + /* a · x^2 + y^2 - 1 - b · x^2 · y^2 == 0 */ + ctx->pow2 (x, x, ctx); + ctx->pow2 (y, y, ctx); + if (ctx->dialect == ECC_DIALECT_ED25519) + ctx->subm (w, ctx->p, x, ctx); + else + ctx->mulm (w, ctx->a, x, ctx); + ctx->addm (w, w, y, ctx); + ctx->mulm (x, x, y, ctx); + ctx->mulm (x, x, ctx->b, ctx); + ctx->subm (w, w, x, ctx); + if (!mpi_cmp_ui (w, 1)) + res = 1; + } + break; + } + + leave: + _gcry_mpi_release (w); + _gcry_mpi_release (x); + _gcry_mpi_release (y); + + return res; +} + + +int +_gcry_mpi_ec_bad_point (gcry_mpi_point_t point, mpi_ec_t ctx) +{ + int i; + gcry_mpi_t x_bad; + + for (i = 0; (x_bad = ctx->t.scratch[i]); i++) + if (!mpi_cmp (point->x, x_bad)) + return 1; + + return 0; +} diff --git a/comm/third_party/libgcrypt/mpi/generic/distfiles b/comm/third_party/libgcrypt/mpi/generic/distfiles new file mode 100644 index 0000000000..649e829b7e --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/generic/distfiles @@ -0,0 +1,10 @@ +mpih-add1.c +mpih-mul1.c +mpih-mul2.c +mpih-mul3.c +mpih-lshift.c +mpih-rshift.c +mpih-sub1.c +udiv-w-sdiv.c +mpi-asm-defs.h + diff --git a/comm/third_party/libgcrypt/mpi/generic/mpi-asm-defs.h b/comm/third_party/libgcrypt/mpi/generic/mpi-asm-defs.h new file mode 100644 index 0000000000..e607806e10 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/generic/mpi-asm-defs.h @@ -0,0 +1,8 @@ +/* This file defines some basic constants for the MPI machinery. + * AMD64 compiled for the x32 ABI is special and thus we can't use the + * standard values for this ABI. */ +#if __GNUC__ >= 3 && defined(__x86_64__) && defined(__ILP32__) +#define BYTES_PER_MPI_LIMB 8 +#else +#define BYTES_PER_MPI_LIMB (SIZEOF_UNSIGNED_LONG) +#endif diff --git a/comm/third_party/libgcrypt/mpi/generic/mpih-add1.c b/comm/third_party/libgcrypt/mpi/generic/mpih-add1.c new file mode 100644 index 0000000000..4a84df64d8 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/generic/mpih-add1.c @@ -0,0 +1,65 @@ +/* mpihelp-add_1.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, + * 2000, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +_gcry_mpih_add_n (mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size) +{ + mpi_limb_t x, y, cy; + mpi_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do + { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to one addend */ + cy = y < cy; /* get out carry from that addition */ + y += x; /* add other addend */ + cy += y < x; /* get out carry from that add, combine */ + res_ptr[j] = y; + } + while ( ++j ); + + return cy; +} + diff --git a/comm/third_party/libgcrypt/mpi/generic/mpih-lshift.c b/comm/third_party/libgcrypt/mpi/generic/mpih-lshift.c new file mode 100644 index 0000000000..f48c12cd02 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/generic/mpih-lshift.c @@ -0,0 +1,68 @@ +/* mpi-lshift.c - MPI helper functions + * Copyright (C) 1994, 1996, 1998, 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" + +/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left + * and store the USIZE least significant digits of the result at WP. + * Return the bits shifted out from the most significant digit. + * + * Argument constraints: + * 1. 0 < CNT < BITS_PER_MP_LIMB + * 2. If the result is to be written over the input, WP must be >= UP. + */ + +mpi_limb_t +_gcry_mpih_lshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, + unsigned int cnt) +{ + mpi_limb_t high_limb, low_limb; + unsigned sh_1, sh_2; + mpi_size_t i; + mpi_limb_t retval; + + sh_1 = cnt; + wp += 1; + sh_2 = BITS_PER_MPI_LIMB - sh_1; + i = usize - 1; + low_limb = up[i]; + retval = low_limb >> sh_2; + high_limb = low_limb; + while ( --i >= 0 ) + { + low_limb = up[i]; + wp[i] = (high_limb << sh_1) | (low_limb >> sh_2); + high_limb = low_limb; + } + wp[i] = high_limb << sh_1; + + return retval; +} + + diff --git a/comm/third_party/libgcrypt/mpi/generic/mpih-mul1.c b/comm/third_party/libgcrypt/mpi/generic/mpih-mul1.c new file mode 100644 index 0000000000..0e8197d88a --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/generic/mpih-mul1.c @@ -0,0 +1,62 @@ +/* mpihelp-mul_1.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001, + * 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +_gcry_mpih_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_limb_t s2_limb) +{ + mpi_limb_t cy_limb; + mpi_size_t j; + mpi_limb_t prod_high, prod_low; + + /* The loop counter and index J goes from -S1_SIZE to -1. This way + * the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + res_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb ); + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb?1:0) + prod_high; + res_ptr[j] = prod_low; + } + while( ++j ); + + return cy_limb; +} + diff --git a/comm/third_party/libgcrypt/mpi/generic/mpih-mul2.c b/comm/third_party/libgcrypt/mpi/generic/mpih-mul2.c new file mode 100644 index 0000000000..3b7549605d --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/generic/mpih-mul2.c @@ -0,0 +1,68 @@ +/* mpih-mul2.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001, + * 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "longlong.h" + + +mpi_limb_t +_gcry_mpih_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t cy_limb; + mpi_size_t j; + mpi_limb_t prod_high, prod_low; + mpi_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + * the loop becomes faster. */ + j = -s1_size; + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb ); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb?1:0) + prod_high; + + x = res_ptr[j]; + prod_low = x + prod_low; + cy_limb += prod_low < x?1:0; + res_ptr[j] = prod_low; + } + while ( ++j ); + + return cy_limb; +} + + diff --git a/comm/third_party/libgcrypt/mpi/generic/mpih-mul3.c b/comm/third_party/libgcrypt/mpi/generic/mpih-mul3.c new file mode 100644 index 0000000000..5e84f94f31 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/generic/mpih-mul3.c @@ -0,0 +1,68 @@ +/* mpih-mul3.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001, + * 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "longlong.h" + + +mpi_limb_t +_gcry_mpih_submul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t cy_limb; + mpi_size_t j; + mpi_limb_t prod_high, prod_low; + mpi_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + * the loop becomes faster. */ + j = -s1_size; + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do + { + umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb?1:0) + prod_high; + + x = res_ptr[j]; + prod_low = x - prod_low; + cy_limb += prod_low > x?1:0; + res_ptr[j] = prod_low; + } + while( ++j ); + + return cy_limb; +} + + diff --git a/comm/third_party/libgcrypt/mpi/generic/mpih-rshift.c b/comm/third_party/libgcrypt/mpi/generic/mpih-rshift.c new file mode 100644 index 0000000000..e40794fcf2 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/generic/mpih-rshift.c @@ -0,0 +1,67 @@ +/* mpih-rshift.c - MPI helper functions + * Copyright (C) 1994, 1996, 1998, 1999, + * 2000, 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" + + +/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right + * and store the USIZE least significant limbs of the result at WP. + * The bits shifted out to the right are returned. + * + * Argument constraints: + * 1. 0 < CNT < BITS_PER_MP_LIMB + * 2. If the result is to be written over the input, WP must be <= UP. + */ + +mpi_limb_t +_gcry_mpih_rshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt) +{ + mpi_limb_t high_limb, low_limb; + unsigned sh_1, sh_2; + mpi_size_t i; + mpi_limb_t retval; + + sh_1 = cnt; + wp -= 1; + sh_2 = BITS_PER_MPI_LIMB - sh_1; + high_limb = up[0]; + retval = high_limb << sh_2; + low_limb = high_limb; + for (i=1; i < usize; i++) + { + high_limb = up[i]; + wp[i] = (low_limb >> sh_1) | (high_limb << sh_2); + low_limb = high_limb; + } + wp[i] = low_limb >> sh_1; + + return retval; +} + diff --git a/comm/third_party/libgcrypt/mpi/generic/mpih-sub1.c b/comm/third_party/libgcrypt/mpi/generic/mpih-sub1.c new file mode 100644 index 0000000000..e88821bfb4 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/generic/mpih-sub1.c @@ -0,0 +1,66 @@ +/* mpihelp-add_2.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001, + * 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +_gcry_mpih_sub_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size) +{ + mpi_limb_t x, y, cy; + mpi_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do + { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to subtrahend */ + cy = y < cy; /* get out carry from that addition */ + y = x - y; /* main subtract */ + cy += y > x; /* get out carry from the subtract, combine */ + res_ptr[j] = y; + } + while( ++j ); + + return cy; +} + + diff --git a/comm/third_party/libgcrypt/mpi/generic/udiv-w-sdiv.c b/comm/third_party/libgcrypt/mpi/generic/udiv-w-sdiv.c new file mode 100644 index 0000000000..e80d98bc54 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/generic/udiv-w-sdiv.c @@ -0,0 +1,133 @@ +/* mpih-w-sdiv -- implement udiv_qrnnd on machines with only signed + * division. + * Copyright (C) 1992, 1994, 1996, 1998, 2002 Free Software Foundation, Inc. + * Contributed by Peter L. Montgomery. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "longlong.h" + + +#if 0 /* not yet ported to MPI */ + +mpi_limb_t +mpihelp_udiv_w_sdiv( mpi_limp_t *rp, + mpi_limp_t *a1, + mpi_limp_t *a0, + mpi_limp_t *d ) +{ + mp_limb_t q, r; + mp_limb_t c0, c1, b1; + + if ((mpi_limb_signed_t) d >= 0) + { + if (a1 < d - a1 - (a0 >> (BITS_PER_MP_LIMB - 1))) + { + /* dividend, divisor, and quotient are nonnegative */ + sdiv_qrnnd (q, r, a1, a0, d); + } + else + { + /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d */ + sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (BITS_PER_MP_LIMB - 1)); + /* Divide (c1*2^32 + c0) by d */ + sdiv_qrnnd (q, r, c1, c0, d); + /* Add 2^31 to quotient */ + q += (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1); + } + } + else + { + b1 = d >> 1; /* d/2, between 2^30 and 2^31 - 1 */ + c1 = a1 >> 1; /* A/2 */ + c0 = (a1 << (BITS_PER_MP_LIMB - 1)) + (a0 >> 1); + + if (a1 < b1) /* A < 2^32*b1, so A/2 < 2^31*b1 */ + { + sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */ + + r = 2*r + (a0 & 1); /* Remainder from A/(2*b1) */ + if ((d & 1) != 0) + { + if (r >= q) + r = r - q; + else if (q - r <= d) + { + r = r - q + d; + q--; + } + else + { + r = r - q + 2*d; + q -= 2; + } + } + } + else if (c1 < b1) /* So 2^31 <= (A/2)/b1 < 2^32 */ + { + c1 = (b1 - 1) - c1; + c0 = ~c0; /* logical NOT */ + + sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */ + + q = ~q; /* (A/2)/b1 */ + r = (b1 - 1) - r; + + r = 2*r + (a0 & 1); /* A/(2*b1) */ + + if ((d & 1) != 0) + { + if (r >= q) + r = r - q; + else if (q - r <= d) + { + r = r - q + d; + q--; + } + else + { + r = r - q + 2*d; + q -= 2; + } + } + } + else /* Implies c1 = b1 */ + { /* Hence a1 = d - 1 = 2*b1 - 1 */ + if (a0 >= -d) + { + q = -1; + r = a0 + d; + } + else + { + q = -2; + r = a0 + 2*d; + } + } + } + + *rp = r; + return q; +} + +#endif + diff --git a/comm/third_party/libgcrypt/mpi/hppa/README b/comm/third_party/libgcrypt/mpi/hppa/README new file mode 100644 index 0000000000..5a2d5fd970 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/hppa/README @@ -0,0 +1,84 @@ +This directory contains mpn functions for various HP PA-RISC chips. Code +that runs faster on the PA7100 and later implementations, is in the pa7100 +directory. + +RELEVANT OPTIMIZATION ISSUES + + Load and Store timing + +On the PA7000 no memory instructions can issue the two cycles after a store. +For the PA7100, this is reduced to one cycle. + +The PA7100 has a lookup-free cache, so it helps to schedule loads and the +dependent instruction really far from each other. + +STATUS + +1. mpn_mul_1 could be improved to 6.5 cycles/limb on the PA7100, using the + instructions bwlow (but some sw pipelining is needed to avoid the + xmpyu-fstds delay): + + fldds s1_ptr + + xmpyu + fstds N(%r30) + xmpyu + fstds N(%r30) + + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + + addc + stws res_ptr + addc + stws res_ptr + + addib Loop + +2. mpn_addmul_1 could be improved from the current 10 to 7.5 cycles/limb + (asymptotically) on the PA7100, using the instructions below. With proper + sw pipelining and the unrolling level below, the speed becomes 8 + cycles/limb. + + fldds s1_ptr + fldds s1_ptr + + xmpyu + fstds N(%r30) + xmpyu + fstds N(%r30) + xmpyu + fstds N(%r30) + xmpyu + fstds N(%r30) + + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + ldws N(%r30) + addc + addc + addc + addc + addc %r0,%r0,cy-limb + + ldws res_ptr + ldws res_ptr + ldws res_ptr + ldws res_ptr + add + stws res_ptr + addc + stws res_ptr + addc + stws res_ptr + addc + stws res_ptr + + addib diff --git a/comm/third_party/libgcrypt/mpi/hppa/distfiles b/comm/third_party/libgcrypt/mpi/hppa/distfiles new file mode 100644 index 0000000000..7f24205d34 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/hppa/distfiles @@ -0,0 +1,7 @@ +README +udiv-qrnnd.S +mpih-add1.S +mpih-sub1.S +mpih-lshift.S +mpih-rshift.S + diff --git a/comm/third_party/libgcrypt/mpi/hppa/mpih-add1.S b/comm/third_party/libgcrypt/mpi/hppa/mpih-add1.S new file mode 100644 index 0000000000..3bc0e5e196 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/hppa/mpih-add1.S @@ -0,0 +1,70 @@ +/* hppa add_n -- Add two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 1992, 1994, 1998, + * 2001, 2002 Fee Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + + +/******************* + * mpi_limb_t + * _gcry_mpih_add_n( mpi_ptr_t res_ptr, (gr26) + * mpi_ptr_t s1_ptr, (gr25) + * mpi_ptr_t s2_ptr, (gr24) + * mpi_size_t size) (gr23) + * + * One might want to unroll this as for other processors, but it turns + * out that the data cache contention after a store makes such + * unrolling useless. We can't come under 5 cycles/limb anyway. + */ + + .code + .export _gcry_mpih_add_n + .label _gcry_mpih_add_n + .proc + .callinfo frame=0,no_calls + .entry + + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + + addib,= -1,%r23,L$end ; check for (SIZE == 1) + add %r20,%r19,%r28 ; add first limbs ignoring cy + + .label L$loop + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,<> -1,%r23,L$loop + addc %r20,%r19,%r28 + + .label L$end + stws %r28,0(0,%r26) + bv 0(%r2) + addc %r0,%r0,%r28 + + .exit + .procend diff --git a/comm/third_party/libgcrypt/mpi/hppa/mpih-lshift.S b/comm/third_party/libgcrypt/mpi/hppa/mpih-lshift.S new file mode 100644 index 0000000000..91b29bb6e7 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/hppa/mpih-lshift.S @@ -0,0 +1,77 @@ +/* hppa lshift + * + * Copyright (C) 1992, 1994, 1998 + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + + +/******************* + * mpi_limb_t + * _gcry_mpih_lshift( mpi_ptr_t wp, (gr26) + * mpi_ptr_t up, (gr25) + * mpi_size_t usize, (gr24) + * unsigned cnt) (gr23) + */ + + .code + .export _gcry_mpih_lshift + .label _gcry_mpih_lshift + .proc + .callinfo frame=64,no_calls + .entry + + sh2add %r24,%r25,%r25 + sh2add %r24,%r26,%r26 + ldws,mb -4(0,%r25),%r22 + subi 32,%r23,%r1 + mtsar %r1 + addib,= -1,%r24,L$0004 + vshd %r0,%r22,%r28 ; compute carry out limb + ldws,mb -4(0,%r25),%r29 + addib,= -1,%r24,L$0002 + vshd %r22,%r29,%r20 + + .label L$loop + ldws,mb -4(0,%r25),%r22 + stws,mb %r20,-4(0,%r26) + addib,= -1,%r24,L$0003 + vshd %r29,%r22,%r20 + ldws,mb -4(0,%r25),%r29 + stws,mb %r20,-4(0,%r26) + addib,<> -1,%r24,L$loop + vshd %r22,%r29,%r20 + + .label L$0002 + stws,mb %r20,-4(0,%r26) + vshd %r29,%r0,%r20 + bv 0(%r2) + stw %r20,-4(0,%r26) + .label L$0003 + stws,mb %r20,-4(0,%r26) + .label L$0004 + vshd %r22,%r0,%r20 + bv 0(%r2) + stw %r20,-4(0,%r26) + + .exit + .procend + + + diff --git a/comm/third_party/libgcrypt/mpi/hppa/mpih-rshift.S b/comm/third_party/libgcrypt/mpi/hppa/mpih-rshift.S new file mode 100644 index 0000000000..37a9d4ef92 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/hppa/mpih-rshift.S @@ -0,0 +1,73 @@ +/* hppa rshift + * + * Copyright (C) 1992, 1994, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + + + +/******************* + * mpi_limb_t + * _gcry_mpih_rshift( mpi_ptr_t wp, (gr26) + * mpi_ptr_t up, (gr25) + * mpi_size_t usize, (gr24) + * unsigned cnt) (gr23) + */ + + .code + .export _gcry_mpih_rshift + .label _gcry_mpih_rshift + .proc + .callinfo frame=64,no_calls + .entry + + ldws,ma 4(0,%r25),%r22 + mtsar %r23 + addib,= -1,%r24,L$r004 + vshd %r22,%r0,%r28 ; compute carry out limb + ldws,ma 4(0,%r25),%r29 + addib,= -1,%r24,L$r002 + vshd %r29,%r22,%r20 + + .label L$roop + ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + addib,= -1,%r24,L$r003 + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + addib,<> -1,%r24,L$roop + vshd %r29,%r22,%r20 + + .label L$r002 + stws,ma %r20,4(0,%r26) + vshd %r0,%r29,%r20 + bv 0(%r2) + stw %r20,0(0,%r26) + .label L$r003 + stws,ma %r20,4(0,%r26) + .label L$r004 + vshd %r0,%r22,%r20 + bv 0(%r2) + stw %r20,0(0,%r26) + + .exit + .procend + diff --git a/comm/third_party/libgcrypt/mpi/hppa/mpih-sub1.S b/comm/third_party/libgcrypt/mpi/hppa/mpih-sub1.S new file mode 100644 index 0000000000..8d197e412a --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/hppa/mpih-sub1.S @@ -0,0 +1,78 @@ +/* hppa sub_n -- Sub two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 1992, 1994, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, (gr26) + * mpi_ptr_t s1_ptr, (gr25) + * mpi_ptr_t s2_ptr, (gr24) + * mpi_size_t size) (gr23) + * + * One might want to unroll this as for other processors, but it turns + * out that the data cache contention after a store makes such + * unrolling useless. We can't come under 5 cycles/limb anyway. + */ + + + .code + .export _gcry_mpih_sub_n + .label _gcry_mpih_sub_n + .proc + .callinfo frame=0,no_calls + .entry + + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + + addib,= -1,%r23,L$end ; check for (SIZE == 1) + sub %r20,%r19,%r28 ; subtract first limbs ignoring cy + + .label L$loop + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,<> -1,%r23,L$loop + subb %r20,%r19,%r28 + + .label L$end + stws %r28,0(0,%r26) + addc %r0,%r0,%r28 + bv 0(%r2) + subi 1,%r28,%r28 + + .exit + .procend + + + diff --git a/comm/third_party/libgcrypt/mpi/hppa/udiv-qrnnd.S b/comm/third_party/libgcrypt/mpi/hppa/udiv-qrnnd.S new file mode 100644 index 0000000000..59ebf7a002 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/hppa/udiv-qrnnd.S @@ -0,0 +1,297 @@ +/* HP-PA __udiv_qrnnd division support, used from longlong.h. + * This version runs fast on pre-PA7000 CPUs. + * + * Copyright (C) 1993, 1994, 1998, 2001, + * 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + + +/* INPUT PARAMETERS + * rem_ptr gr26 + * n1 gr25 + * n0 gr24 + * d gr23 + * + * The code size is a bit excessive. We could merge the last two ds;addc + * sequences by simply moving the "bb,< Odd" instruction down. The only + * trouble is the FFFFFFFF code that would need some hacking. + */ + + .code + .export __udiv_qrnnd + .label __udiv_qrnnd + .proc + .callinfo frame=0,no_calls + .entry + + comb,< %r23,0,L$largedivisor + sub %r0,%r23,%r1 ; clear cy as side-effect + ds %r0,%r1,%r0 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r28 + ds %r25,%r23,%r25 + comclr,>= %r25,%r0,%r0 + addl %r25,%r23,%r25 + stws %r25,0(0,%r26) + bv 0(%r2) + addc %r28,%r28,%r28 + + .label L$largedivisor + extru %r24,31,1,%r19 ; r19 = n0 & 1 + bb,< %r23,31,L$odd + extru %r23,30,31,%r22 ; r22 = d >> 1 + shd %r25,%r24,1,%r24 ; r24 = new n0 + extru %r25,30,31,%r25 ; r25 = new n1 + sub %r0,%r22,%r21 + ds %r0,%r21,%r0 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + comclr,>= %r25,%r0,%r0 + addl %r25,%r22,%r25 + sh1addl %r25,%r19,%r25 + stws %r25,0(0,%r26) + bv 0(%r2) + addc %r24,%r24,%r28 + + .label L$odd + addib,sv,n 1,%r22,L$FF.. ; r22 = (d / 2 + 1) + shd %r25,%r24,1,%r24 ; r24 = new n0 + extru %r25,30,31,%r25 ; r25 = new n1 + sub %r0,%r22,%r21 + ds %r0,%r21,%r0 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r28 + comclr,>= %r25,%r0,%r0 + addl %r25,%r22,%r25 + sh1addl %r25,%r19,%r25 +; We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25 + add,nuv %r28,%r25,%r25 + addl %r25,%r1,%r25 + addc %r0,%r28,%r28 + sub,<< %r25,%r23,%r0 + addl %r25,%r1,%r25 + stws %r25,0(0,%r26) + bv 0(%r2) + addc %r0,%r28,%r28 + +; This is just a special case of the code above. +; We come here when d == 0xFFFFFFFF + .label L$FF.. + add,uv %r25,%r24,%r24 + sub,<< %r24,%r23,%r0 + ldo 1(%r24),%r24 + stws %r24,0(0,%r26) + bv 0(%r2) + addc %r0,%r25,%r28 + + .exit + .procend diff --git a/comm/third_party/libgcrypt/mpi/hppa1.1/distfiles b/comm/third_party/libgcrypt/mpi/hppa1.1/distfiles new file mode 100644 index 0000000000..d68227ac70 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/hppa1.1/distfiles @@ -0,0 +1,5 @@ +udiv-qrnnd.S +mpih-mul1.S +mpih-mul2.S +mpih-mul3.S + diff --git a/comm/third_party/libgcrypt/mpi/hppa1.1/mpih-mul1.S b/comm/third_party/libgcrypt/mpi/hppa1.1/mpih-mul1.S new file mode 100644 index 0000000000..45926dd7b5 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/hppa1.1/mpih-mul1.S @@ -0,0 +1,115 @@ +/* hppa1.1 mul_1 -- Multiply a limb vector with a limb and store + * the result in a second limb vector. + * + * Copyright (C) 1992, 1993, 1994, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +/******************* + * mpi_limb_t + * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, (r26) + * mpi_ptr_t s1_ptr, (r25) + * mpi_size_t s1_size, (r24) + * mpi_limb_t s2_limb) (r23) + * + * + * + * This runs at 9 cycles/limb on a PA7000. With the used instructions, it can + * not become faster due to data cache contention after a store. On the + * PA7100 it runs at 7 cycles/limb, and that can not be improved either, since + * only the xmpyu does not need the integer pipeline, so the only dual-issue + * we will get are addc+xmpyu. Unrolling would not help either CPU. + * + * We could use fldds to read two limbs at a time from the S1 array, and that + * could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and + * PA7100, respectively. We don't do that since it does not seem worth the + * (alignment) troubles... + * + * At least the PA7100 is rumored to be able to deal with cache-misses + * without stalling instruction issue. If this is true, and the cache is + * actually also lockup-free, we should use a deeper software pipeline, and + * load from S1 very early! (The loads and stores to -12(sp) will surely be + * in the cache.) + */ + + .level 1.1 + + .code + .export _gcry_mpih_mul_1 + .label _gcry_mpih_mul_1 + .proc + .callinfo frame=64,no_calls + .entry + + ldo 64(%r30),%r30 + fldws,ma 4(%r25),%fr5 + stw %r23,-16(%r30) ; move s2_limb ... + addib,= -1,%r24,L$just_one_limb + fldws -16(%r30),%fr4 ; ... into fr4 + add %r0,%r0,%r0 ; clear carry + xmpyu %fr4,%fr5,%fr6 + fldws,ma 4(%r25),%fr7 + fstds %fr6,-16(%r30) + xmpyu %fr4,%fr7,%fr8 + ldw -12(%r30),%r19 ; least significant limb in product + ldw -16(%r30),%r28 + + fstds %fr8,-16(%r30) + addib,= -1,%r24,L$end + ldw -12(%r30),%r1 + +; Main loop + .label L$loop + fldws,ma 4(%r25),%fr5 + stws,ma %r19,4(%r26) + addc %r28,%r1,%r19 + xmpyu %fr4,%fr5,%fr6 + ldw -16(%r30),%r28 + fstds %fr6,-16(%r30) + addib,<> -1,%r24,L$loop + ldw -12(%r30),%r1 + + .label L$end + stws,ma %r19,4(%r26) + addc %r28,%r1,%r19 + ldw -16(%r30),%r28 + stws,ma %r19,4(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + + .label L$just_one_limb + xmpyu %fr4,%fr5,%fr6 + fstds %fr6,-16(%r30) + ldw -16(%r30),%r28 + ldo -64(%r30),%r30 + bv 0(%r2) + fstws %fr6R,0(%r26) + + .exit + .procend + + diff --git a/comm/third_party/libgcrypt/mpi/hppa1.1/mpih-mul2.S b/comm/third_party/libgcrypt/mpi/hppa1.1/mpih-mul2.S new file mode 100644 index 0000000000..1047ab5649 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/hppa1.1/mpih-mul2.S @@ -0,0 +1,117 @@ +/* hppa1.1 addmul_1 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * + * Copyright (C) 1992, 1993, 1994, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +/******************* + * mpi_limb_t + * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, (r26) + * mpi_ptr_t s1_ptr, (r25) + * mpi_size_t s1_size, (r24) + * mpi_limb_t s2_limb) (r23) + * + * This runs at 11 cycles/limb on a PA7000. With the used instructions, it + * can not become faster due to data cache contention after a store. On the + * PA7100 it runs at 10 cycles/limb, and that can not be improved either, + * since only the xmpyu does not need the integer pipeline, so the only + * dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb + * on the PA7100. + * + * There are some ideas described in mul1.S that applies to this code too. + */ + + .level 1.1 + + .code + .export _gcry_mpih_addmul_1 + .label _gcry_mpih_addmul_1 + .proc + .callinfo frame=64,no_calls + .entry + + ldo 64(%r30),%r30 + fldws,ma 4(%r25),%fr5 + stw %r23,-16(%r30) ; move s2_limb ... + addib,= -1,%r24,L$just_one_limb + fldws -16(%r30),%fr4 ; ... into fr4 + add %r0,%r0,%r0 ; clear carry + xmpyu %fr4,%fr5,%fr6 + fldws,ma 4(%r25),%fr7 + fstds %fr6,-16(%r30) + xmpyu %fr4,%fr7,%fr8 + ldw -12(%r30),%r19 ; least significant limb in product + ldw -16(%r30),%r28 + + fstds %fr8,-16(%r30) + addib,= -1,%r24,L$end + ldw -12(%r30),%r1 + +; Main loop + .label L$loop + ldws 0(%r26),%r29 + fldws,ma 4(%r25),%fr5 + add %r29,%r19,%r19 + stws,ma %r19,4(%r26) + addc %r28,%r1,%r19 + xmpyu %fr4,%fr5,%fr6 + ldw -16(%r30),%r28 + fstds %fr6,-16(%r30) + addc %r0,%r28,%r28 + addib,<> -1,%r24,L$loop + ldw -12(%r30),%r1 + + .label L$end + ldw 0(%r26),%r29 + add %r29,%r19,%r19 + stws,ma %r19,4(%r26) + addc %r28,%r1,%r19 + ldw -16(%r30),%r28 + ldws 0(%r26),%r29 + addc %r0,%r28,%r28 + add %r29,%r19,%r19 + stws,ma %r19,4(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + + .label L$just_one_limb + xmpyu %fr4,%fr5,%fr6 + ldw 0(%r26),%r29 + fstds %fr6,-16(%r30) + ldw -12(%r30),%r1 + ldw -16(%r30),%r28 + add %r29,%r1,%r19 + stw %r19,0(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + + .exit + .procend + + diff --git a/comm/third_party/libgcrypt/mpi/hppa1.1/mpih-mul3.S b/comm/third_party/libgcrypt/mpi/hppa1.1/mpih-mul3.S new file mode 100644 index 0000000000..632adf1eec --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/hppa1.1/mpih-mul3.S @@ -0,0 +1,126 @@ +/* hppa1.1 submul_1 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * + * Copyright (C) 1992, 1993, 1994, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +/******************* + * mpi_limb_t + * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, (r26) + * mpi_ptr_t s1_ptr, (r25) + * mpi_size_t s1_size, (r24) + * mpi_limb_t s2_limb) (r23) + * + * + * This runs at 12 cycles/limb on a PA7000. With the used instructions, it + * can not become faster due to data cache contention after a store. On the + * PA7100 it runs at 11 cycles/limb, and that can not be improved either, + * since only the xmpyu does not need the integer pipeline, so the only + * dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb + * on the PA7100. + * + * There are some ideas described in mul1.S that applies to this code too. + * + * It seems possible to make this run as fast as addmul_1, if we use + * sub,>>= %r29,%r19,%r22 + * addi 1,%r28,%r28 + * but that requires reworking the hairy software pipeline... + */ + + .level 1.1 + + .code + .export _gcry_mpih_submul_1 + .label _gcry_mpih_submul_1 + .proc + .callinfo frame=64,no_calls + .entry + + ldo 64(%r30),%r30 + fldws,ma 4(%r25),%fr5 + stw %r23,-16(%r30) ; move s2_limb ... + addib,= -1,%r24,L$just_one_limb + fldws -16(%r30),%fr4 ; ... into fr4 + add %r0,%r0,%r0 ; clear carry + xmpyu %fr4,%fr5,%fr6 + fldws,ma 4(%r25),%fr7 + fstds %fr6,-16(%r30) + xmpyu %fr4,%fr7,%fr8 + ldw -12(%r30),%r19 ; least significant limb in product + ldw -16(%r30),%r28 + + fstds %fr8,-16(%r30) + addib,= -1,%r24,L$end + ldw -12(%r30),%r1 + +; Main loop + .label L$loop + ldws 0(%r26),%r29 + fldws,ma 4(%r25),%fr5 + sub %r29,%r19,%r22 + add %r22,%r19,%r0 + stws,ma %r22,4(%r26) + addc %r28,%r1,%r19 + xmpyu %fr4,%fr5,%fr6 + ldw -16(%r30),%r28 + fstds %fr6,-16(%r30) + addc %r0,%r28,%r28 + addib,<> -1,%r24,L$loop + ldw -12(%r30),%r1 + + .label L$end + ldw 0(%r26),%r29 + sub %r29,%r19,%r22 + add %r22,%r19,%r0 + stws,ma %r22,4(%r26) + addc %r28,%r1,%r19 + ldw -16(%r30),%r28 + ldws 0(%r26),%r29 + addc %r0,%r28,%r28 + sub %r29,%r19,%r22 + add %r22,%r19,%r0 + stws,ma %r22,4(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + + .label L$just_one_limb + xmpyu %fr4,%fr5,%fr6 + ldw 0(%r26),%r29 + fstds %fr6,-16(%r30) + ldw -12(%r30),%r1 + ldw -16(%r30),%r28 + sub %r29,%r1,%r22 + add %r22,%r1,%r0 + stw %r22,0(%r26) + addc %r0,%r28,%r28 + bv 0(%r2) + ldo -64(%r30),%r30 + + .exit + .procend + diff --git a/comm/third_party/libgcrypt/mpi/hppa1.1/udiv-qrnnd.S b/comm/third_party/libgcrypt/mpi/hppa1.1/udiv-qrnnd.S new file mode 100644 index 0000000000..3f28b7b64d --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/hppa1.1/udiv-qrnnd.S @@ -0,0 +1,92 @@ +/* HP-PA __udiv_qrnnd division support, used from longlong.h. + * This version runs fast on PA 7000 and later. + * + * Copyright (C) 1993, 1994, 1998, + * 2001, 2002, 2004 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + + +/* INPUT PARAMETERS + * rem_ptr gr26 + * n1 gr25 + * n0 gr24 + * d gr23 + */ + + .level 1.1 + + .data + .align 8 + .label L$0000 + .word 0x43f00000 + .word 0x0 + .code + .export __udiv_qrnnd + .label __udiv_qrnnd + .proc + .callinfo frame=64,no_calls + .entry + ldo 64(%r30),%r30 + + stws %r25,-16(0,%r30) ; n_hi + stws %r24,-12(0,%r30) ; n_lo + stw %r19,-32(%r30) + addil LT%L$0000,%r19 + ldw RT%L$0000(%r1),%r1 + fldds -16(0,%r30),%fr5 + stws %r23,-12(0,%r30) + comib,<= 0,%r25,L$1 + fcnvxf,dbl,dbl %fr5,%fr5 + fldds 0(0,%r1),%fr4 + fadd,dbl %fr4,%fr5,%fr5 + .label L$1 + fcpy,sgl %fr0,%fr6L + fldws -12(0,%r30),%fr6R + fcnvxf,dbl,dbl %fr6,%fr4 + + fdiv,dbl %fr5,%fr4,%fr5 + + fcnvfx,dbl,dbl %fr5,%fr4 + fstws %fr4R,-16(%r30) + xmpyu %fr4R,%fr6R,%fr6 + ldws -16(%r30),%r28 + fstds %fr6,-16(0,%r30) + ldws -12(0,%r30),%r21 + ldws -16(0,%r30),%r20 + sub %r24,%r21,%r22 + subb %r25,%r20,%r1 + comib,= 0,%r1,L$2 + ldo -64(%r30),%r30 + + add %r22,%r23,%r22 + ldo -1(%r28),%r28 + .label L$2 + bv 0(%r2) + stws %r22,0(0,%r26) + + .exit + .procend + diff --git a/comm/third_party/libgcrypt/mpi/i386/distfiles b/comm/third_party/libgcrypt/mpi/i386/distfiles new file mode 100644 index 0000000000..88d2a30c7d --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i386/distfiles @@ -0,0 +1,9 @@ +mpih-add1.S +mpih-mul1.S +mpih-mul2.S +mpih-mul3.S +mpih-lshift.S +mpih-rshift.S +mpih-sub1.S +syntax.h + diff --git a/comm/third_party/libgcrypt/mpi/i386/mpih-add1.S b/comm/third_party/libgcrypt/mpi/i386/mpih-add1.S new file mode 100644 index 0000000000..de78a0cb1d --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i386/mpih-add1.S @@ -0,0 +1,161 @@ +/* i80386 add_n -- Add two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 1992, 1994, 1995, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_add_n( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_ptr_t s2_ptr, (sp + 12) + * mpi_size_t size) (sp + 16) + */ + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(_gcry_mpih_add_n) +C_SYMBOL_NAME(_gcry_mpih_add_n:) + CFI_STARTPROC() + pushl %edi + CFI_PUSH(%edi) + pushl %esi + CFI_PUSH(%esi) + + movl 12(%esp),%edi /* res_ptr */ + movl 16(%esp),%esi /* s1_ptr */ + movl 20(%esp),%edx /* s2_ptr */ + movl 24(%esp),%ecx /* size */ + +#if defined __CET__ && (__CET__ & 1) != 0 + pushl %ebx + CFI_PUSH(%ebx) +#endif + + movl %ecx,%eax + shrl $3,%ecx /* compute count for unrolled loop */ + negl %eax + andl $7,%eax /* get index where to start loop */ + jz Loop /* necessary special case for 0 */ + incl %ecx /* adjust loop count */ + shll $2,%eax /* adjustment for pointers... */ + subl %eax,%edi /* ... since they are offset ... */ + subl %eax,%esi /* ... by a constant when we ... */ + subl %eax,%edx /* ... enter the loop */ + shrl $2,%eax /* restore previous value */ +#if defined __CET__ && (__CET__ & 1) != 0 + leal -4(,%eax,4),%ebx /* Count for 4-byte endbr32 */ +#endif +#ifdef PIC +/* Calculate start address in loop for PIC. Due to limitations in some + assemblers, Loop-L0-3 cannot be put into the leal */ + call L0 + CFI_ADJUST_CFA_OFFSET(4) +L0: leal (%eax,%eax,8),%eax + addl (%esp),%eax + addl $(Loop-L0-3),%eax + addl $4,%esp + CFI_ADJUST_CFA_OFFSET(-4) +#else +/* Calculate start address in loop for non-PIC. */ + leal (Loop - 3)(%eax,%eax,8),%eax +#endif +#if defined __CET__ && (__CET__ & 1) != 0 + addl %ebx,%eax /* Adjust for endbr32 */ +#endif + jmp *%eax /* jump into loop */ + ALIGN (3) +Loop: movl (%esi),%eax + adcl (%edx),%eax + movl %eax,(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif + movl 4(%esi),%eax + adcl 4(%edx),%eax + movl %eax,4(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif + movl 8(%esi),%eax + adcl 8(%edx),%eax + movl %eax,8(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif + movl 12(%esi),%eax + adcl 12(%edx),%eax + movl %eax,12(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif + movl 16(%esi),%eax + adcl 16(%edx),%eax + movl %eax,16(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif + movl 20(%esi),%eax + adcl 20(%edx),%eax + movl %eax,20(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif + movl 24(%esi),%eax + adcl 24(%edx),%eax + movl %eax,24(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif + movl 28(%esi),%eax + adcl 28(%edx),%eax + movl %eax,28(%edi) + leal 32(%edi),%edi + leal 32(%esi),%esi + leal 32(%edx),%edx + decl %ecx + jnz Loop + + sbbl %eax,%eax + negl %eax + +#if defined __CET__ && (__CET__ & 1) != 0 + popl %ebx + CFI_POP(%ebx) +#endif + + popl %esi + CFI_POP(%esi) + popl %edi + CFI_POP(%edi) + ret + CFI_ENDPROC() + diff --git a/comm/third_party/libgcrypt/mpi/i386/mpih-lshift.S b/comm/third_party/libgcrypt/mpi/i386/mpih-lshift.S new file mode 100644 index 0000000000..55da0678d0 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i386/mpih-lshift.S @@ -0,0 +1,102 @@ +/* i80386 lshift + * Copyright (C) 1992, 1994, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_lshift( mpi_ptr_t wp, (sp + 4) + * mpi_ptr_t up, (sp + 8) + * mpi_size_t usize, (sp + 12) + * unsigned cnt) (sp + 16) + */ + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(_gcry_mpih_lshift) +C_SYMBOL_NAME(_gcry_mpih_lshift:) + CFI_STARTPROC() + pushl %edi + CFI_PUSH(%edi) + pushl %esi + CFI_PUSH(%esi) + pushl %ebx + CFI_PUSH(%ebx) + + movl 16(%esp),%edi /* res_ptr */ + movl 20(%esp),%esi /* s_ptr */ + movl 24(%esp),%edx /* size */ + movl 28(%esp),%ecx /* cnt */ + + subl $4,%esi /* adjust s_ptr */ + + movl (%esi,%edx,4),%ebx /* read most significant limb */ + xorl %eax,%eax + shldl %cl,%ebx,%eax /* compute carry limb */ + decl %edx + jz Lend + pushl %eax /* push carry limb onto stack */ + testb $1,%dl + jnz L1 /* enter loop in the middle */ + movl %ebx,%eax + + ALIGN (3) +Loop: movl (%esi,%edx,4),%ebx /* load next lower limb */ + shldl %cl,%ebx,%eax /* compute result limb */ + movl %eax,(%edi,%edx,4) /* store it */ + decl %edx +L1: movl (%esi,%edx,4),%eax + shldl %cl,%eax,%ebx + movl %ebx,(%edi,%edx,4) + decl %edx + jnz Loop + + shll %cl,%eax /* compute least significant limb */ + movl %eax,(%edi) /* store it */ + + popl %eax /* pop carry limb */ + + popl %ebx + popl %esi + popl %edi + ret + +Lend: shll %cl,%ebx /* compute least significant limb */ + movl %ebx,(%edi) /* store it */ + + popl %ebx + CFI_POP(%ebx) + popl %esi + CFI_POP(%esi) + popl %edi + CFI_POP(%edi) + ret + CFI_ENDPROC() + diff --git a/comm/third_party/libgcrypt/mpi/i386/mpih-mul1.S b/comm/third_party/libgcrypt/mpi/i386/mpih-mul1.S new file mode 100644 index 0000000000..9679ea6224 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i386/mpih-mul1.S @@ -0,0 +1,94 @@ +/* i80386 mul_1 -- Multiply a limb vector with a limb and store + * the result in a second limb vector. + * Copyright (C) 1992, 1994, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + */ + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(_gcry_mpih_mul_1) +C_SYMBOL_NAME(_gcry_mpih_mul_1:) + + CFI_STARTPROC() + INSN1(push,l ,R(edi)) + CFI_PUSH(%edi) + INSN1(push,l ,R(esi)) + CFI_PUSH(%esi) + INSN1(push,l ,R(ebx)) + CFI_PUSH(%ebx) + INSN1(push,l ,R(ebp)) + CFI_PUSH(%ebp) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) +Loop: + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + INSN1(mul,l ,R(s2_limb)) + INSN2(add,l ,R(eax),R(ebx)) + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(eax)) + INSN2(adc,l ,R(edx),$0) + INSN2(mov,l ,R(ebx),R(edx)) + + INSN1(inc,l ,R(size)) + INSN1(jnz, ,Loop) + INSN2(mov,l ,R(eax),R(ebx)) + + INSN1(pop,l ,R(ebp)) + CFI_POP(%ebp) + INSN1(pop,l ,R(ebx)) + CFI_POP(%ebx) + INSN1(pop,l ,R(esi)) + CFI_POP(%esi) + INSN1(pop,l ,R(edi)) + CFI_POP(%edi) + ret + CFI_ENDPROC() + diff --git a/comm/third_party/libgcrypt/mpi/i386/mpih-mul2.S b/comm/third_party/libgcrypt/mpi/i386/mpih-mul2.S new file mode 100644 index 0000000000..fe4129c435 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i386/mpih-mul2.S @@ -0,0 +1,96 @@ +/* i80386 addmul_1 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * + * Copyright (C) 1992, 1994, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + */ + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(_gcry_mpih_addmul_1) +C_SYMBOL_NAME(_gcry_mpih_addmul_1:) + + CFI_STARTPROC() + INSN1(push,l ,R(edi)) + CFI_PUSH(%edi) + INSN1(push,l ,R(esi)) + CFI_PUSH(%esi) + INSN1(push,l ,R(ebx)) + CFI_PUSH(%ebx) + INSN1(push,l ,R(ebp)) + CFI_PUSH(%ebp) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) +Loop: + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + INSN1(mul,l ,R(s2_limb)) + INSN2(add,l ,R(eax),R(ebx)) + INSN2(adc,l ,R(edx),$0) + INSN2(add,l ,MEM_INDEX(res_ptr,size,4),R(eax)) + INSN2(adc,l ,R(edx),$0) + INSN2(mov,l ,R(ebx),R(edx)) + + INSN1(inc,l ,R(size)) + INSN1(jnz, ,Loop) + INSN2(mov,l ,R(eax),R(ebx)) + + INSN1(pop,l ,R(ebp)) + CFI_POP(%ebp) + INSN1(pop,l ,R(ebx)) + CFI_POP(%ebx) + INSN1(pop,l ,R(esi)) + CFI_POP(%esi) + INSN1(pop,l ,R(edi)) + CFI_POP(%edi) + ret + CFI_ENDPROC() + diff --git a/comm/third_party/libgcrypt/mpi/i386/mpih-mul3.S b/comm/third_party/libgcrypt/mpi/i386/mpih-mul3.S new file mode 100644 index 0000000000..87577d54ca --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i386/mpih-mul3.S @@ -0,0 +1,96 @@ +/* i80386 submul_1 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * + * Copyright (C) 1992, 1994, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + */ + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(_gcry_mpih_submul_1) +C_SYMBOL_NAME(_gcry_mpih_submul_1:) + + CFI_STARTPROC() + INSN1(push,l ,R(edi)) + CFI_PUSH(%edi) + INSN1(push,l ,R(esi)) + CFI_PUSH(%esi) + INSN1(push,l ,R(ebx)) + CFI_PUSH(%ebx) + INSN1(push,l ,R(ebp)) + CFI_PUSH(%ebp) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) +Loop: + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + INSN1(mul,l ,R(s2_limb)) + INSN2(add,l ,R(eax),R(ebx)) + INSN2(adc,l ,R(edx),$0) + INSN2(sub,l ,MEM_INDEX(res_ptr,size,4),R(eax)) + INSN2(adc,l ,R(edx),$0) + INSN2(mov,l ,R(ebx),R(edx)) + + INSN1(inc,l ,R(size)) + INSN1(jnz, ,Loop) + INSN2(mov,l ,R(eax),R(ebx)) + + INSN1(pop,l ,R(ebp)) + CFI_POP(%ebp) + INSN1(pop,l ,R(ebx)) + CFI_POP(%ebx) + INSN1(pop,l ,R(esi)) + CFI_POP(%esi) + INSN1(pop,l ,R(edi)) + CFI_POP(%edi) + ret + CFI_ENDPROC() + diff --git a/comm/third_party/libgcrypt/mpi/i386/mpih-rshift.S b/comm/third_party/libgcrypt/mpi/i386/mpih-rshift.S new file mode 100644 index 0000000000..35a8201f35 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i386/mpih-rshift.S @@ -0,0 +1,105 @@ +/* i80386 rshift + * + * Copyright (C) 1992, 1994, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_rshift( mpi_ptr_t wp, (sp + 4) + * mpi_ptr_t up, (sp + 8) + * mpi_size_t usize, (sp + 12) + * unsigned cnt) (sp + 16) + */ + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(_gcry_mpih_rshift) +C_SYMBOL_NAME(_gcry_mpih_rshift:) + CFI_STARTPROC() + pushl %edi + CFI_PUSH(%edi) + pushl %esi + CFI_PUSH(%esi) + pushl %ebx + CFI_PUSH(%ebx) + + movl 16(%esp),%edi /* wp */ + movl 20(%esp),%esi /* up */ + movl 24(%esp),%edx /* usize */ + movl 28(%esp),%ecx /* cnt */ + + leal -4(%edi,%edx,4),%edi + leal (%esi,%edx,4),%esi + negl %edx + + movl (%esi,%edx,4),%ebx /* read least significant limb */ + xorl %eax,%eax + shrdl %cl,%ebx,%eax /* compute carry limb */ + incl %edx + jz Lend2 + pushl %eax /* push carry limb onto stack */ + testb $1,%dl + jnz L2 /* enter loop in the middle */ + movl %ebx,%eax + + ALIGN (3) +Loop2: movl (%esi,%edx,4),%ebx /* load next higher limb */ + shrdl %cl,%ebx,%eax /* compute result limb */ + movl %eax,(%edi,%edx,4) /* store it */ + incl %edx +L2: movl (%esi,%edx,4),%eax + shrdl %cl,%eax,%ebx + movl %ebx,(%edi,%edx,4) + incl %edx + jnz Loop2 + + shrl %cl,%eax /* compute most significant limb */ + movl %eax,(%edi) /* store it */ + + popl %eax /* pop carry limb */ + + popl %ebx + popl %esi + popl %edi + ret + +Lend2: shrl %cl,%ebx /* compute most significant limb */ + movl %ebx,(%edi) /* store it */ + + popl %ebx + CFI_POP(%ebx) + popl %esi + CFI_POP(%esi) + popl %edi + CFI_POP(%edi) + ret + CFI_ENDPROC() + diff --git a/comm/third_party/libgcrypt/mpi/i386/mpih-sub1.S b/comm/third_party/libgcrypt/mpi/i386/mpih-sub1.S new file mode 100644 index 0000000000..2bdc143866 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i386/mpih-sub1.S @@ -0,0 +1,162 @@ +/* i80386 sub_n -- Sub two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 1992, 1994, 1995, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_ptr_t s2_ptr, (sp + 12) + * mpi_size_t size) (sp + 16) + */ + + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(_gcry_mpih_sub_n) +C_SYMBOL_NAME(_gcry_mpih_sub_n:) + CFI_STARTPROC() + pushl %edi + CFI_PUSH(%edi) + pushl %esi + CFI_PUSH(%esi) + + movl 12(%esp),%edi /* res_ptr */ + movl 16(%esp),%esi /* s1_ptr */ + movl 20(%esp),%edx /* s2_ptr */ + movl 24(%esp),%ecx /* size */ + +#if defined __CET__ && (__CET__ & 1) != 0 + pushl %ebx + CFI_PUSH(%ebx) +#endif + + movl %ecx,%eax + shrl $3,%ecx /* compute count for unrolled loop */ + negl %eax + andl $7,%eax /* get index where to start loop */ + jz Loop /* necessary special case for 0 */ + incl %ecx /* adjust loop count */ + shll $2,%eax /* adjustment for pointers... */ + subl %eax,%edi /* ... since they are offset ... */ + subl %eax,%esi /* ... by a constant when we ... */ + subl %eax,%edx /* ... enter the loop */ + shrl $2,%eax /* restore previous value */ +#if defined __CET__ && (__CET__ & 1) != 0 + leal -4(,%eax,4),%ebx /* Count for 4-byte endbr32 */ +#endif +#ifdef PIC +/* Calculate start address in loop for PIC. Due to limitations in some + assemblers, Loop-L0-3 cannot be put into the leal */ + call L0 + CFI_ADJUST_CFA_OFFSET(4) +L0: leal (%eax,%eax,8),%eax + addl (%esp),%eax + addl $(Loop-L0-3),%eax + addl $4,%esp + CFI_ADJUST_CFA_OFFSET(-4) +#else +/* Calculate start address in loop for non-PIC. */ + leal (Loop - 3)(%eax,%eax,8),%eax +#endif +#if defined __CET__ && (__CET__ & 1) != 0 + addl %ebx,%eax /* Adjust for endbr32 */ +#endif + jmp *%eax /* jump into loop */ + ALIGN (3) +Loop: movl (%esi),%eax + sbbl (%edx),%eax + movl %eax,(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif + movl 4(%esi),%eax + sbbl 4(%edx),%eax + movl %eax,4(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif + movl 8(%esi),%eax + sbbl 8(%edx),%eax + movl %eax,8(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif + movl 12(%esi),%eax + sbbl 12(%edx),%eax + movl %eax,12(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif + movl 16(%esi),%eax + sbbl 16(%edx),%eax + movl %eax,16(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif + movl 20(%esi),%eax + sbbl 20(%edx),%eax + movl %eax,20(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif + movl 24(%esi),%eax + sbbl 24(%edx),%eax + movl %eax,24(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif + movl 28(%esi),%eax + sbbl 28(%edx),%eax + movl %eax,28(%edi) + leal 32(%edi),%edi + leal 32(%esi),%esi + leal 32(%edx),%edx + decl %ecx + jnz Loop + + sbbl %eax,%eax + negl %eax + +#if defined __CET__ && (__CET__ & 1) != 0 + popl %ebx + CFI_POP(%ebx) +#endif + + popl %esi + CFI_POP(%esi) + popl %edi + CFI_POP(%edi) + ret + CFI_ENDPROC() + diff --git a/comm/third_party/libgcrypt/mpi/i386/syntax.h b/comm/third_party/libgcrypt/mpi/i386/syntax.h new file mode 100644 index 0000000000..dd30031995 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i386/syntax.h @@ -0,0 +1,94 @@ +/* syntax.h -- Definitions for x86 syntax variations. + * + * Copyright (C) 1992, 1994, 1995, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include <config.h> + +#ifdef __i386__ +#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES +# define CFI_STARTPROC() .cfi_startproc +# define CFI_ENDPROC() .cfi_endproc +# define CFI_ADJUST_CFA_OFFSET(off) .cfi_adjust_cfa_offset off +# define CFI_REL_OFFSET(reg,off) .cfi_rel_offset reg, off +# define CFI_RESTORE(reg) .cfi_restore reg + +# define CFI_PUSH(reg) \ + CFI_ADJUST_CFA_OFFSET(4); CFI_REL_OFFSET(reg, 0) +# define CFI_POP(reg) \ + CFI_ADJUST_CFA_OFFSET(-4); CFI_RESTORE(reg) +#else +# define CFI_STARTPROC() +# define CFI_ENDPROC() +# define CFI_ADJUST_CFA_OFFSET(off) +# define CFI_REL_OFFSET(reg,off) +# define CFI_RESTORE(reg) + +# define CFI_PUSH(reg) +# define CFI_POP(reg) +#endif +#endif + +#undef ALIGN + +#if defined (BSD_SYNTAX) || defined (ELF_SYNTAX) +#define R(r) %r +#define MEM(base)(base) +#define MEM_DISP(base,displacement)displacement(R(base)) +#define MEM_INDEX(base,index,size)(R(base),R(index),size) +#ifdef __STDC__ +#define INSN1(mnemonic,size_suffix,dst)mnemonic##size_suffix dst +#define INSN2(mnemonic,size_suffix,dst,src)mnemonic##size_suffix src,dst +#else +#define INSN1(mnemonic,size_suffix,dst)mnemonic/**/size_suffix dst +#define INSN2(mnemonic,size_suffix,dst,src)mnemonic/**/size_suffix src,dst +#endif +#define TEXT .text +#if defined (BSD_SYNTAX) +#define ALIGN(log) .align log +#endif +#if defined (ELF_SYNTAX) +#define ALIGN(log) .align 1<<(log) +#endif +#define GLOBL .globl +#endif + +#ifdef INTEL_SYNTAX +#define R(r) r +#define MEM(base)[base] +#define MEM_DISP(base,displacement)[base+(displacement)] +#define MEM_INDEX(base,index,size)[base+index*size] +#define INSN1(mnemonic,size_suffix,dst)mnemonic dst +#define INSN2(mnemonic,size_suffix,dst,src)mnemonic dst,src +#define TEXT .text +#define ALIGN(log) .align log +#define GLOBL .globl +#endif + +#ifdef X86_BROKEN_ALIGN +#undef ALIGN +#define ALIGN(log) .align log,0x90 +#endif diff --git a/comm/third_party/libgcrypt/mpi/i586/README b/comm/third_party/libgcrypt/mpi/i586/README new file mode 100644 index 0000000000..d73b082684 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i586/README @@ -0,0 +1,26 @@ +This directory contains mpn functions optimized for Intel Pentium +processors. + +RELEVANT OPTIMIZATION ISSUES + +1. Pentium doesn't allocate cache lines on writes, unlike most other modern +processors. Since the functions in the mpn class do array writes, we have to +handle allocating the destination cache lines by reading a word from it in the +loops, to achieve the best performance. + +2. Pairing of memory operations requires that the two issued operations refer +to different cache banks. The simplest way to insure this is to read/write +two words from the same object. If we make operations on different objects, +they might or might not be to the same cache bank. + +STATUS + +1. mpn_lshift and mpn_rshift run at about 6 cycles/limb, but the Pentium +documentation indicates that they should take only 43/8 = 5.375 cycles/limb, +or 5 cycles/limb asymptotically. + +2. mpn_add_n and mpn_sub_n run at asymptotically 2 cycles/limb. Due to loop +overhead and other delays (cache refill?), they run at or near 2.5 cycles/limb. + +3. mpn_mul_1, mpn_addmul_1, mpn_submul_1 all run 1 cycle faster than they +should... diff --git a/comm/third_party/libgcrypt/mpi/i586/distfiles b/comm/third_party/libgcrypt/mpi/i586/distfiles new file mode 100644 index 0000000000..8f821fbfb4 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i586/distfiles @@ -0,0 +1,9 @@ +mpih-add1.S +mpih-mul1.S +mpih-mul2.S +mpih-mul3.S +mpih-lshift.S +mpih-rshift.S +mpih-sub1.S +README + diff --git a/comm/third_party/libgcrypt/mpi/i586/mpih-add1.S b/comm/third_party/libgcrypt/mpi/i586/mpih-add1.S new file mode 100644 index 0000000000..7436d59268 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i586/mpih-add1.S @@ -0,0 +1,135 @@ +/* i80586 add_n -- Add two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 1992, 1994, 1995, 1996, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_add_n( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_ptr_t s2_ptr, (sp + 12) + * mpi_size_t size) (sp + 16) + */ + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(_gcry_mpih_add_n) +C_SYMBOL_NAME(_gcry_mpih_add_n:) + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s1_ptr */ + movl 28(%esp),%ebp /* s2_ptr */ + movl 32(%esp),%ecx /* size */ + + movl (%ebp),%ebx + + decl %ecx + movl %ecx,%edx + shrl $3,%ecx + andl $7,%edx + testl %ecx,%ecx /* zero carry flag */ + jz Lend + pushl %edx + + ALIGN (3) +Loop: movl 28(%edi),%eax /* fetch destination cache line */ + leal 32(%edi),%edi + +L1: movl (%esi),%eax + movl 4(%esi),%edx + adcl %ebx,%eax + movl 4(%ebp),%ebx + adcl %ebx,%edx + movl 8(%ebp),%ebx + movl %eax,-32(%edi) + movl %edx,-28(%edi) + +L2: movl 8(%esi),%eax + movl 12(%esi),%edx + adcl %ebx,%eax + movl 12(%ebp),%ebx + adcl %ebx,%edx + movl 16(%ebp),%ebx + movl %eax,-24(%edi) + movl %edx,-20(%edi) + +L3: movl 16(%esi),%eax + movl 20(%esi),%edx + adcl %ebx,%eax + movl 20(%ebp),%ebx + adcl %ebx,%edx + movl 24(%ebp),%ebx + movl %eax,-16(%edi) + movl %edx,-12(%edi) + +L4: movl 24(%esi),%eax + movl 28(%esi),%edx + adcl %ebx,%eax + movl 28(%ebp),%ebx + adcl %ebx,%edx + movl 32(%ebp),%ebx + movl %eax,-8(%edi) + movl %edx,-4(%edi) + + leal 32(%esi),%esi + leal 32(%ebp),%ebp + decl %ecx + jnz Loop + + popl %edx +Lend: + decl %edx /* test %edx w/o clobbering carry */ + js Lend2 + incl %edx +Loop2: + leal 4(%edi),%edi + movl (%esi),%eax + adcl %ebx,%eax + movl 4(%ebp),%ebx + movl %eax,-4(%edi) + leal 4(%esi),%esi + leal 4(%ebp),%ebp + decl %edx + jnz Loop2 +Lend2: + movl (%esi),%eax + adcl %ebx,%eax + movl %eax,(%edi) + + sbbl %eax,%eax + negl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + + diff --git a/comm/third_party/libgcrypt/mpi/i586/mpih-lshift.S b/comm/third_party/libgcrypt/mpi/i586/mpih-lshift.S new file mode 100644 index 0000000000..9d25fe9d7b --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i586/mpih-lshift.S @@ -0,0 +1,229 @@ +/* i80586 lshift + * + * Copyright (C) 1992, 1994, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_lshift( mpi_ptr_t wp, (sp + 4) + * mpi_ptr_t up, (sp + 8) + * mpi_size_t usize, (sp + 12) + * unsigned cnt) (sp + 16) + */ + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(_gcry_mpih_lshift) +C_SYMBOL_NAME(_gcry_mpih_lshift:) + + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s_ptr */ + movl 28(%esp),%ebp /* size */ + movl 32(%esp),%ecx /* cnt */ + +/* We can use faster code for shift-by-1 under certain conditions. */ + cmp $1,%ecx + jne Lnormal + leal 4(%esi),%eax + cmpl %edi,%eax + jnc Lspecial /* jump if s_ptr + 1 >= res_ptr */ + leal (%esi,%ebp,4),%eax + cmpl %eax,%edi + jnc Lspecial /* jump if res_ptr >= s_ptr + size */ + +Lnormal: + leal -4(%edi,%ebp,4),%edi + leal -4(%esi,%ebp,4),%esi + + movl (%esi),%edx + subl $4,%esi + xorl %eax,%eax + shldl %cl,%edx,%eax /* compute carry limb */ + pushl %eax /* push carry limb onto stack */ + + decl %ebp + pushl %ebp + shrl $3,%ebp + jz Lend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +Loop: movl -28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl -4(%esi),%edx + shldl %cl,%eax,%ebx + shldl %cl,%edx,%eax + movl %ebx,(%edi) + movl %eax,-4(%edi) + + movl -8(%esi),%ebx + movl -12(%esi),%eax + shldl %cl,%ebx,%edx + shldl %cl,%eax,%ebx + movl %edx,-8(%edi) + movl %ebx,-12(%edi) + + movl -16(%esi),%edx + movl -20(%esi),%ebx + shldl %cl,%edx,%eax + shldl %cl,%ebx,%edx + movl %eax,-16(%edi) + movl %edx,-20(%edi) + + movl -24(%esi),%eax + movl -28(%esi),%edx + shldl %cl,%eax,%ebx + shldl %cl,%edx,%eax + movl %ebx,-24(%edi) + movl %eax,-28(%edi) + + subl $32,%esi + subl $32,%edi + decl %ebp + jnz Loop + +Lend: popl %ebp + andl $7,%ebp + jz Lend2 +Loop2: movl (%esi),%eax + shldl %cl,%eax,%edx + movl %edx,(%edi) + movl %eax,%edx + subl $4,%esi + subl $4,%edi + decl %ebp + jnz Loop2 + +Lend2: shll %cl,%edx /* compute least significant limb */ + movl %edx,(%edi) /* store it */ + + popl %eax /* pop carry limb */ + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +/* We loop from least significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. +*/ + +Lspecial: + movl (%esi),%edx + addl $4,%esi + + decl %ebp + pushl %ebp + shrl $3,%ebp + + addl %edx,%edx + incl %ebp + decl %ebp + jz LLend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +LLoop: movl 28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl 4(%esi),%edx + adcl %eax,%eax + movl %ebx,(%edi) + adcl %edx,%edx + movl %eax,4(%edi) + + movl 8(%esi),%ebx + movl 12(%esi),%eax + adcl %ebx,%ebx + movl %edx,8(%edi) + adcl %eax,%eax + movl %ebx,12(%edi) + + movl 16(%esi),%edx + movl 20(%esi),%ebx + adcl %edx,%edx + movl %eax,16(%edi) + adcl %ebx,%ebx + movl %edx,20(%edi) + + movl 24(%esi),%eax + movl 28(%esi),%edx + adcl %eax,%eax + movl %ebx,24(%edi) + adcl %edx,%edx + movl %eax,28(%edi) + + leal 32(%esi),%esi /* use leal not to clobber carry */ + leal 32(%edi),%edi + decl %ebp + jnz LLoop + +LLend: popl %ebp + sbbl %eax,%eax /* save carry in %eax */ + andl $7,%ebp + jz LLend2 + addl %eax,%eax /* restore carry from eax */ +LLoop2: movl %edx,%ebx + movl (%esi),%edx + adcl %edx,%edx + movl %ebx,(%edi) + + leal 4(%esi),%esi /* use leal not to clobber carry */ + leal 4(%edi),%edi + decl %ebp + jnz LLoop2 + + jmp LL1 +LLend2: addl %eax,%eax /* restore carry from eax */ +LL1: movl %edx,(%edi) /* store last limb */ + + sbbl %eax,%eax + negl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + + diff --git a/comm/third_party/libgcrypt/mpi/i586/mpih-mul1.S b/comm/third_party/libgcrypt/mpi/i586/mpih-mul1.S new file mode 100644 index 0000000000..3601d968be --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i586/mpih-mul1.S @@ -0,0 +1,89 @@ +/* i80586 mul_1 -- Multiply a limb vector with a limb and store + * the result in a second limb vector. + * + * Copyright (C) 1992, 1994, 1996, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + */ + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(_gcry_mpih_mul_1) +C_SYMBOL_NAME(_gcry_mpih_mul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) + +Loop: INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + + INSN1(mul,l ,R(s2_limb)) + + INSN2(add,l ,R(ebx),R(eax)) + + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) + INSN1(inc,l ,R(size)) + + INSN2(mov,l ,R(ebx),R(edx)) + INSN1(jnz, ,Loop) + + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret + diff --git a/comm/third_party/libgcrypt/mpi/i586/mpih-mul2.S b/comm/third_party/libgcrypt/mpi/i586/mpih-mul2.S new file mode 100644 index 0000000000..f32d363a7d --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i586/mpih-mul2.S @@ -0,0 +1,93 @@ +/* i80586 addmul_1 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * + * Copyright (C) 1992, 1994, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + */ + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(_gcry_mpih_addmul_1) +C_SYMBOL_NAME(_gcry_mpih_addmul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) + +Loop: INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + + INSN1(mul,l ,R(s2_limb)) + + INSN2(add,l ,R(eax),R(ebx)) + INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4)) + + INSN2(adc,l ,R(edx),$0) + INSN2(add,l ,R(ebx),R(eax)) + + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) + INSN1(inc,l ,R(size)) + + INSN2(mov,l ,R(ebx),R(edx)) + INSN1(jnz, ,Loop) + + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret + diff --git a/comm/third_party/libgcrypt/mpi/i586/mpih-mul3.S b/comm/third_party/libgcrypt/mpi/i586/mpih-mul3.S new file mode 100644 index 0000000000..fa27d4e1a5 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i586/mpih-mul3.S @@ -0,0 +1,93 @@ +/* i80586 submul_1 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * + * Copyright (C) 1992, 1994, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + */ + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(_gcry_mpih_submul_1) +C_SYMBOL_NAME(_gcry_mpih_submul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) + +Loop: INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + + INSN1(mul,l ,R(s2_limb)) + + INSN2(add,l ,R(eax),R(ebx)) + INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4)) + + INSN2(adc,l ,R(edx),$0) + INSN2(sub,l ,R(ebx),R(eax)) + + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) + INSN1(inc,l ,R(size)) + + INSN2(mov,l ,R(ebx),R(edx)) + INSN1(jnz, ,Loop) + + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret + diff --git a/comm/third_party/libgcrypt/mpi/i586/mpih-rshift.S b/comm/third_party/libgcrypt/mpi/i586/mpih-rshift.S new file mode 100644 index 0000000000..c661e3d3b9 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i586/mpih-rshift.S @@ -0,0 +1,228 @@ +/* i80586 rshift + * + * Copyright (C) 1992, 1994, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + + +/******************* + * mpi_limb_t + * _gcry_mpih_rshift( mpi_ptr_t wp, (sp + 4) + * mpi_ptr_t up, (sp + 8) + * mpi_size_t usize, (sp + 12) + * unsigned cnt) (sp + 16) + */ + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(_gcry_mpih_rshift) +C_SYMBOL_NAME(_gcry_mpih_rshift:) + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s_ptr */ + movl 28(%esp),%ebp /* size */ + movl 32(%esp),%ecx /* cnt */ + +/* We can use faster code for shift-by-1 under certain conditions. */ + cmp $1,%ecx + jne Rnormal + leal 4(%edi),%eax + cmpl %esi,%eax + jnc Rspecial /* jump if res_ptr + 1 >= s_ptr */ + leal (%edi,%ebp,4),%eax + cmpl %eax,%esi + jnc Rspecial /* jump if s_ptr >= res_ptr + size */ + +Rnormal: + movl (%esi),%edx + addl $4,%esi + xorl %eax,%eax + shrdl %cl,%edx,%eax /* compute carry limb */ + pushl %eax /* push carry limb onto stack */ + + decl %ebp + pushl %ebp + shrl $3,%ebp + jz Rend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +Roop: movl 28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl 4(%esi),%edx + shrdl %cl,%eax,%ebx + shrdl %cl,%edx,%eax + movl %ebx,(%edi) + movl %eax,4(%edi) + + movl 8(%esi),%ebx + movl 12(%esi),%eax + shrdl %cl,%ebx,%edx + shrdl %cl,%eax,%ebx + movl %edx,8(%edi) + movl %ebx,12(%edi) + + movl 16(%esi),%edx + movl 20(%esi),%ebx + shrdl %cl,%edx,%eax + shrdl %cl,%ebx,%edx + movl %eax,16(%edi) + movl %edx,20(%edi) + + movl 24(%esi),%eax + movl 28(%esi),%edx + shrdl %cl,%eax,%ebx + shrdl %cl,%edx,%eax + movl %ebx,24(%edi) + movl %eax,28(%edi) + + addl $32,%esi + addl $32,%edi + decl %ebp + jnz Roop + +Rend: popl %ebp + andl $7,%ebp + jz Rend2 +Roop2: movl (%esi),%eax + shrdl %cl,%eax,%edx /* compute result limb */ + movl %edx,(%edi) + movl %eax,%edx + addl $4,%esi + addl $4,%edi + decl %ebp + jnz Roop2 + +Rend2: shrl %cl,%edx /* compute most significant limb */ + movl %edx,(%edi) /* store it */ + + popl %eax /* pop carry limb */ + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +/* We loop from least significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. +*/ + +Rspecial: + leal -4(%edi,%ebp,4),%edi + leal -4(%esi,%ebp,4),%esi + + movl (%esi),%edx + subl $4,%esi + + decl %ebp + pushl %ebp + shrl $3,%ebp + + shrl $1,%edx + incl %ebp + decl %ebp + jz RLend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +RLoop: movl -28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl -4(%esi),%edx + rcrl $1,%eax + movl %ebx,(%edi) + rcrl $1,%edx + movl %eax,-4(%edi) + + movl -8(%esi),%ebx + movl -12(%esi),%eax + rcrl $1,%ebx + movl %edx,-8(%edi) + rcrl $1,%eax + movl %ebx,-12(%edi) + + movl -16(%esi),%edx + movl -20(%esi),%ebx + rcrl $1,%edx + movl %eax,-16(%edi) + rcrl $1,%ebx + movl %edx,-20(%edi) + + movl -24(%esi),%eax + movl -28(%esi),%edx + rcrl $1,%eax + movl %ebx,-24(%edi) + rcrl $1,%edx + movl %eax,-28(%edi) + + leal -32(%esi),%esi /* use leal not to clobber carry */ + leal -32(%edi),%edi + decl %ebp + jnz RLoop + +RLend: popl %ebp + sbbl %eax,%eax /* save carry in %eax */ + andl $7,%ebp + jz RLend2 + addl %eax,%eax /* restore carry from eax */ +RLoop2: movl %edx,%ebx + movl (%esi),%edx + rcrl $1,%edx + movl %ebx,(%edi) + + leal -4(%esi),%esi /* use leal not to clobber carry */ + leal -4(%edi),%edi + decl %ebp + jnz RLoop2 + + jmp RL1 +RLend2: addl %eax,%eax /* restore carry from eax */ +RL1: movl %edx,(%edi) /* store last limb */ + + movl $0,%eax + rcrl $1,%eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + diff --git a/comm/third_party/libgcrypt/mpi/i586/mpih-sub1.S b/comm/third_party/libgcrypt/mpi/i586/mpih-sub1.S new file mode 100644 index 0000000000..ef2d580743 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/i586/mpih-sub1.S @@ -0,0 +1,142 @@ +/* i80586 sub_n -- Sub two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 1992, 1994, 1995, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_ptr_t s2_ptr, (sp + 12) + * mpi_size_t size) (sp + 16) + */ + + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(_gcry_mpih_sub_n) +C_SYMBOL_NAME(_gcry_mpih_sub_n:) + + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s1_ptr */ + movl 28(%esp),%ebp /* s2_ptr */ + movl 32(%esp),%ecx /* size */ + + movl (%ebp),%ebx + + decl %ecx + movl %ecx,%edx + shrl $3,%ecx + andl $7,%edx + testl %ecx,%ecx /* zero carry flag */ + jz Lend + pushl %edx + + ALIGN (3) +Loop: movl 28(%edi),%eax /* fetch destination cache line */ + leal 32(%edi),%edi + +L1: movl (%esi),%eax + movl 4(%esi),%edx + sbbl %ebx,%eax + movl 4(%ebp),%ebx + sbbl %ebx,%edx + movl 8(%ebp),%ebx + movl %eax,-32(%edi) + movl %edx,-28(%edi) + +L2: movl 8(%esi),%eax + movl 12(%esi),%edx + sbbl %ebx,%eax + movl 12(%ebp),%ebx + sbbl %ebx,%edx + movl 16(%ebp),%ebx + movl %eax,-24(%edi) + movl %edx,-20(%edi) + +L3: movl 16(%esi),%eax + movl 20(%esi),%edx + sbbl %ebx,%eax + movl 20(%ebp),%ebx + sbbl %ebx,%edx + movl 24(%ebp),%ebx + movl %eax,-16(%edi) + movl %edx,-12(%edi) + +L4: movl 24(%esi),%eax + movl 28(%esi),%edx + sbbl %ebx,%eax + movl 28(%ebp),%ebx + sbbl %ebx,%edx + movl 32(%ebp),%ebx + movl %eax,-8(%edi) + movl %edx,-4(%edi) + + leal 32(%esi),%esi + leal 32(%ebp),%ebp + decl %ecx + jnz Loop + + popl %edx +Lend: + decl %edx /* test %edx w/o clobbering carry */ + js Lend2 + incl %edx +Loop2: + leal 4(%edi),%edi + movl (%esi),%eax + sbbl %ebx,%eax + movl 4(%ebp),%ebx + movl %eax,-4(%edi) + leal 4(%esi),%esi + leal 4(%ebp),%ebp + decl %edx + jnz Loop2 +Lend2: + movl (%esi),%eax + sbbl %ebx,%eax + movl %eax,(%edi) + + sbbl %eax,%eax + negl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + diff --git a/comm/third_party/libgcrypt/mpi/longlong.h b/comm/third_party/libgcrypt/mpi/longlong.h new file mode 100644 index 0000000000..6bb6bedfb9 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/longlong.h @@ -0,0 +1,1801 @@ +/* longlong.h -- definitions for mixed size 32/64 bit arithmetic. + Note: This is the Libgcrypt version + + +Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998, + 2000, 2001, 2002, 2003, 2004, 2011 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with this file; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* You have to define the following before including this file: + + UWtype -- An unsigned type, default type for operations (typically a "word") + UHWtype -- An unsigned type, at least half the size of UWtype. + UDWtype -- An unsigned type, at least twice as large a UWtype + W_TYPE_SIZE -- size in bits of UWtype + + SItype, USItype -- Signed and unsigned 32 bit types. + DItype, UDItype -- Signed and unsigned 64 bit types. + + On a 32 bit machine UWtype should typically be USItype; + on a 64 bit machine, UWtype should typically be UDItype. +*/ + +#define __BITS4 (W_TYPE_SIZE / 4) +#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) + +/* This is used to make sure no undesirable sharing between different libraries + that use this file takes place. */ +#ifndef __MPN +# define __MPN(x) __##x +#endif + +/* Define auxiliary asm macros. + + 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two + UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype + word product in HIGH_PROD and LOW_PROD. + + 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a + UDWtype product. This is just a variant of umul_ppmm. + + 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + denominator) divides a UDWtype, composed by the UWtype integers + HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient + in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less + than DENOMINATOR for correct operation. If, in addition, the most + significant bit of DENOMINATOR must be 1, then the pre-processor symbol + UDIV_NEEDS_NORMALIZATION is defined to 1. + + 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + denominator). Like udiv_qrnnd but the numbers are signed. The quotient + is rounded towards 0. + + 5) count_leading_zeros(count, x) counts the number of zero-bits from the + msb to the first non-zero bit in the UWtype X. This is the number of + steps X needs to be shifted left to set the msb. Undefined for X == 0, + unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. + + 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts + from the least significant end. + + 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, + high_addend_2, low_addend_2) adds two UWtype integers, composed by + HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 + respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow + (i.e. carry out) is not stored anywhere, and is lost. + + 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, + high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, + composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and + LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE + and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, + and is lost. + + If any of these macros are left undefined for a particular CPU, + C macros are used. */ + +/* The CPUs come in alphabetical order below. + + Please add support for more CPUs here, or improve the current support + for the CPUs below! */ + +#ifdef __riscos__ +#pragma continue_after_hash_error +#else /* !__riscos__ */ +#if defined (__GNUC__) && !defined (NO_ASM) + +/* We sometimes need to clobber "cc" with gcc2, but that would not be + understood by gcc1. Use cpp to avoid major code duplication. */ +#if __GNUC__ < 2 +# define __CLOBBER_CC +# define __AND_CLOBBER_CC +#else /* __GNUC__ >= 2 */ +# define __CLOBBER_CC : "cc" +# define __AND_CLOBBER_CC , "cc" +#endif /* __GNUC__ < 2 */ + +/*************************************** + **** Begin CPU Specific Versions **** + ***************************************/ + +/*************************************** + ************** A29K ***************** + ***************************************/ +#if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32 +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %1,%4,%5\n" \ + "addc %0,%2,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %1,%4,%5\n" \ + "subc %0,%2,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +# define umul_ppmm(xh, xl, m0, m1) \ + do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("multiplu %0,%1,%2" \ + : "=r" ((USItype)(xl)) \ + : "r" (__m0), \ + "r" (__m1)); \ + __asm__ ("multmu %0,%1,%2" \ + : "=r" ((USItype)(xh)) \ + : "r" (__m0), \ + "r" (__m1)); \ + } while (0) +# define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("dividu %0,%3,%4" \ + : "=r" ((USItype)(q)), \ + "=q" ((USItype)(r)) \ + : "1" ((USItype)(n1)), \ + "r" ((USItype)(n0)), \ + "r" ((USItype)(d))) +# define count_leading_zeros(count, x) \ + __asm__ ("clz %0,%1" \ + : "=r" ((USItype)(count)) \ + : "r" ((USItype)(x))) +# define COUNT_LEADING_ZEROS_0 32 +#endif /* __a29k__ */ + + +#if defined (__alpha) && W_TYPE_SIZE == 64 +# define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("umulh %r1,%2,%0" \ + : "=r" ((UDItype) ph) \ + : "%rJ" (__m0), \ + "rI" (__m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +# define UMUL_TIME 46 +# ifndef LONGLONG_STANDALONE +# define udiv_qrnnd(q, r, n1, n0, d) \ + do { UDItype __r; \ + (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ + (r) = __r; \ + } while (0) +extern UDItype __udiv_qrnnd (); +# define UDIV_TIME 220 +# endif /* !LONGLONG_STANDALONE */ +#endif /* __alpha */ + +/*************************************** + ************** ARM ****************** + ***************************************/ +#if defined (__arm__) && W_TYPE_SIZE == 32 && \ + (!defined (__thumb__) || defined (__thumb2__)) +/* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */ +# ifndef __ARM_ARCH +# ifdef __ARM_ARCH_2__ +# define __ARM_ARCH 2 +# elif defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__) +# define __ARM_ARCH 3 +# elif defined (__ARM_ARCH_4__) || defined (__ARM_ARCH_4T__) +# define __ARM_ARCH 4 +# elif defined (__ARM_ARCH_5__) || defined (__ARM_ARCH_5E__) \ + || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \ + || defined(__ARM_ARCH_5TEJ__) +# define __ARM_ARCH 5 +# elif defined (__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined (__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ + || defined (__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) +# define __ARM_ARCH 6 +# elif defined (__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7EM__) +# define __ARM_ARCH 7 +# else + /* could not detect? */ +# endif +# endif /* !__ARM_ARCH */ + +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("adds %1, %4, %5\n" \ + "adc %0, %2, %3" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "%r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl)) __CLOBBER_CC) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subs %1, %4, %5\n" \ + "sbc %0, %2, %3" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "r" ((USItype)(al)), \ + "rI" ((USItype)(bl)) __CLOBBER_CC) +# if (defined __ARM_ARCH && __ARM_ARCH <= 3) +# define umul_ppmm(xh, xl, a, b) \ + __asm__ ("@ Inlined umul_ppmm\n" \ + "mov %|r0, %2, lsr #16 @ AAAA\n" \ + "mov %|r2, %3, lsr #16 @ BBBB\n" \ + "bic %|r1, %2, %|r0, lsl #16 @ aaaa\n" \ + "bic %0, %3, %|r2, lsl #16 @ bbbb\n" \ + "mul %1, %|r1, %|r2 @ aaaa * BBBB\n" \ + "mul %|r2, %|r0, %|r2 @ AAAA * BBBB\n" \ + "mul %|r1, %0, %|r1 @ aaaa * bbbb\n" \ + "mul %0, %|r0, %0 @ AAAA * bbbb\n" \ + "adds %|r0, %1, %0 @ central sum\n" \ + "addcs %|r2, %|r2, #65536\n" \ + "adds %1, %|r1, %|r0, lsl #16\n" \ + "adc %0, %|r2, %|r0, lsr #16" \ + : "=&r" ((xh)), \ + "=r" ((xl)) \ + : "r" ((USItype)(a)), \ + "r" ((USItype)(b)) \ + : "r0", "r1", "r2" __AND_CLOBBER_CC) +# else /* __ARM_ARCH >= 4 */ +# define umul_ppmm(xh, xl, a, b) \ + __asm__ ("@ Inlined umul_ppmm\n" \ + "umull %1, %0, %2, %3" \ + : "=&r" ((xh)), \ + "=r" ((xl)) \ + : "r" ((USItype)(a)), \ + "r" ((USItype)(b))) +# endif /* __ARM_ARCH >= 4 */ +# define UMUL_TIME 20 +# define UDIV_TIME 100 +# if (defined __ARM_ARCH && __ARM_ARCH >= 5) +# define count_leading_zeros(count, x) \ + __asm__ ("clz %0, %1" \ + : "=r" ((count)) \ + : "r" ((USItype)(x))) +# endif /* __ARM_ARCH >= 5 */ +#endif /* __arm__ */ + +/*************************************** + ********** ARM64 / Aarch64 ********** + ***************************************/ +#if defined(__aarch64__) && W_TYPE_SIZE == 64 +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("adds %1, %4, %5\n" \ + "adc %0, %2, %3\n" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "r" ((UDItype)(ah)), \ + "r" ((UDItype)(bh)), \ + "r" ((UDItype)(al)), \ + "r" ((UDItype)(bl)) __CLOBBER_CC) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subs %1, %4, %5\n" \ + "sbc %0, %2, %3\n" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "r" ((UDItype)(ah)), \ + "r" ((UDItype)(bh)), \ + "r" ((UDItype)(al)), \ + "r" ((UDItype)(bl)) __CLOBBER_CC) +# define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1), __ph; \ + (pl) = __m0 * __m1; \ + __asm__ ("umulh %0,%1,%2" \ + : "=r" (__ph) \ + : "r" (__m0), \ + "r" (__m1)); \ + (ph) = __ph; \ + } while (0) +# define count_leading_zeros(count, x) \ + do { \ + UDItype __co; \ + __asm__ ("clz %0, %1\n" \ + : "=r" (__co) \ + : "r" ((UDItype)(x))); \ + (count) = __co; \ + } while (0) +#endif /* __aarch64__ */ + +/*************************************** + ************** CLIPPER ************** + ***************************************/ +#if defined (__clipper__) && W_TYPE_SIZE == 32 +# define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __xx; \ + __asm__ ("mulwux %2,%0" \ + : "=r" (__xx.__ll) \ + : "%0" ((USItype)(u)), \ + "r" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) +# define smul_ppmm(w1, w0, u, v) \ + ({union {DItype __ll; \ + struct {SItype __l, __h;} __i; \ + } __xx; \ + __asm__ ("mulwx %2,%0" \ + : "=r" (__xx.__ll) \ + : "%0" ((SItype)(u)), \ + "r" ((SItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) +# define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("mulwux %2,%0" \ + : "=r" (__w) \ + : "%0" ((USItype)(u)), \ + "r" ((USItype)(v))); \ + __w; }) +#endif /* __clipper__ */ + + +/*************************************** + ************** GMICRO *************** + ***************************************/ +#if defined (__gmicro__) && W_TYPE_SIZE == 32 +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add.w %5,%1\n" \ + "addx %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub.w %5,%1\n" \ + "subx %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +# define umul_ppmm(ph, pl, m0, m1) \ + __asm__ ("mulx %3,%0,%1" \ + : "=g" ((USItype)(ph)), \ + "=r" ((USItype)(pl)) \ + : "%0" ((USItype)(m0)), \ + "g" ((USItype)(m1))) +# define udiv_qrnnd(q, r, nh, nl, d) \ + __asm__ ("divx %4,%0,%1" \ + : "=g" ((USItype)(q)), \ + "=r" ((USItype)(r)) \ + : "1" ((USItype)(nh)), \ + "0" ((USItype)(nl)), \ + "g" ((USItype)(d))) +# define count_leading_zeros(count, x) \ + __asm__ ("bsch/1 %1,%0" \ + : "=g" (count) \ + : "g" ((USItype)(x)), \ + "0" ((USItype)0)) +#endif + + +/*************************************** + ************** HPPA ***************** + ***************************************/ +#if defined (__hppa) && W_TYPE_SIZE == 32 +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ (" add %4,%5,%1\n" \ + " addc %2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%rM" ((USItype)(ah)), \ + "rM" ((USItype)(bh)), \ + "%rM" ((USItype)(al)), \ + "rM" ((USItype)(bl))) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ (" sub %4,%5,%1\n" \ + " subb %2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "rM" ((USItype)(ah)), \ + "rM" ((USItype)(bh)), \ + "rM" ((USItype)(al)), \ + "rM" ((USItype)(bl))) +# if defined (_PA_RISC1_1) +# define umul_ppmm(wh, wl, u, v) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __xx; \ + __asm__ (" xmpyu %1,%2,%0" \ + : "=*f" (__xx.__ll) \ + : "*f" ((USItype)(u)), \ + "*f" ((USItype)(v))); \ + (wh) = __xx.__i.__h; \ + (wl) = __xx.__i.__l; \ + } while (0) +# define UMUL_TIME 8 +# define UDIV_TIME 60 +# else +# define UMUL_TIME 40 +# define UDIV_TIME 80 +# endif +# ifndef LONGLONG_STANDALONE +# define udiv_qrnnd(q, r, n1, n0, d) \ + do { USItype __r; \ + (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ + (r) = __r; \ + } while (0) +extern USItype __udiv_qrnnd (); +# endif /* !LONGLONG_STANDALONE */ +# define count_leading_zeros(count, x) \ + do { \ + USItype __tmp; \ + __asm__ ( \ + " ldi 1,%0 \n" \ + " extru,= %1,15,16,%%r0 ; Bits 31..16 zero? \n" \ + " extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n" \ + " ldo 16(%0),%0 ; Yes. Perform add. \n" \ + " extru,= %1,23,8,%%r0 ; Bits 15..8 zero? \n" \ + " extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n" \ + " ldo 8(%0),%0 ; Yes. Perform add. \n" \ + " extru,= %1,27,4,%%r0 ; Bits 7..4 zero? \n" \ + " extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n" \ + " ldo 4(%0),%0 ; Yes. Perform add. \n" \ + " extru,= %1,29,2,%%r0 ; Bits 3..2 zero? \n" \ + " extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n" \ + " ldo 2(%0),%0 ; Yes. Perform add. \n" \ + " extru %1,30,1,%1 ; Extract bit 1. \n" \ + " sub %0,%1,%0 ; Subtract it. " \ + : "=r" (count), "=r" (__tmp) : "1" (x)); \ + } while (0) +#endif /* hppa */ + + +/*************************************** + ************** I370 ***************** + ***************************************/ +#if (defined (__i370__) || defined (__mvs__)) && W_TYPE_SIZE == 32 +# define umul_ppmm(xh, xl, m0, m1) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __xx; \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mr %0,%3" \ + : "=r" (__xx.__i.__h), \ + "=r" (__xx.__i.__l) \ + : "%1" (__m0), \ + "r" (__m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + (xh) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ + } while (0) +# define smul_ppmm(xh, xl, m0, m1) \ + do { \ + union {DItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __xx; \ + __asm__ ("mr %0,%3" \ + : "=r" (__xx.__i.__h), \ + "=r" (__xx.__i.__l) \ + : "%1" (m0), \ + "r" (m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + } while (0) +# define sdiv_qrnnd(q, r, n1, n0, d) \ + do { \ + union {DItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __xx; \ + __xx.__i.__h = n1; __xx.__i.__l = n0; \ + __asm__ ("dr %0,%2" \ + : "=r" (__xx.__ll) \ + : "0" (__xx.__ll), "r" (d)); \ + (q) = __xx.__i.__l; (r) = __xx.__i.__h; \ + } while (0) +#endif + + +/*************************************** + ************** I386 ***************** + ***************************************/ +#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32 +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addl %5,%1\n" \ + "adcl %3,%0" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl)) \ + __CLOBBER_CC) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subl %5,%1\n" \ + "sbbl %3,%0" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl)) \ + __CLOBBER_CC) +# define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mull %3" \ + : "=a" ((w0)), \ + "=d" ((w1)) \ + : "%0" ((USItype)(u)), \ + "rm" ((USItype)(v)) \ + __CLOBBER_CC) +# define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divl %4" \ + : "=a" ((q)), \ + "=d" ((r)) \ + : "0" ((USItype)(n0)), \ + "1" ((USItype)(n1)), \ + "rm" ((USItype)(d)) \ + __CLOBBER_CC) +# define count_leading_zeros(count, x) \ + do { \ + USItype __cbtmp; \ + __asm__ ("bsrl %1,%0" \ + : "=r" (__cbtmp) : "rm" ((USItype)(x)) \ + __CLOBBER_CC); \ + (count) = __cbtmp ^ 31; \ + } while (0) +# define count_trailing_zeros(count, x) \ + __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)) __CLOBBER_CC) +# ifndef UMUL_TIME +# define UMUL_TIME 40 +# endif +# ifndef UDIV_TIME +# define UDIV_TIME 40 +# endif +#endif /* 80x86 */ + +/*************************************** + *********** AMD64 / x86-64 ************ + ***************************************/ +#if defined(__x86_64) && W_TYPE_SIZE == 64 +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addq %5,%1\n" \ + "adcq %3,%0" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "0" ((UDItype)(ah)), \ + "g" ((UDItype)(bh)), \ + "1" ((UDItype)(al)), \ + "g" ((UDItype)(bl)) \ + __CLOBBER_CC) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subq %5,%1\n" \ + "sbbq %3,%0" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "0" ((UDItype)(ah)), \ + "g" ((UDItype)(bh)), \ + "1" ((UDItype)(al)), \ + "g" ((UDItype)(bl)) \ + __CLOBBER_CC) +# define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mulq %3" \ + : "=a" ((w0)), \ + "=d" ((w1)) \ + : "0" ((UDItype)(u)), \ + "rm" ((UDItype)(v)) \ + __CLOBBER_CC) +# define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divq %4" \ + : "=a" ((q)), \ + "=d" ((r)) \ + : "0" ((UDItype)(n0)), \ + "1" ((UDItype)(n1)), \ + "rm" ((UDItype)(d)) \ + __CLOBBER_CC) +# define count_leading_zeros(count, x) \ + do { \ + UDItype __cbtmp; \ + __asm__ ("bsrq %1,%0" \ + : "=r" (__cbtmp) : "rm" ((UDItype)(x)) \ + __CLOBBER_CC); \ + (count) = __cbtmp ^ 63; \ + } while (0) +# define count_trailing_zeros(count, x) \ + do { \ + UDItype __cbtmp; \ + __asm__ ("bsfq %1,%0" \ + : "=r" (__cbtmp) : "rm" ((UDItype)(x)) \ + __CLOBBER_CC); \ + (count) = __cbtmp; \ + } while (0) +# ifndef UMUL_TIME +# define UMUL_TIME 40 +# endif +# ifndef UDIV_TIME +# define UDIV_TIME 40 +# endif +#endif /* __x86_64 */ + + +/*************************************** + ************** I860 ***************** + ***************************************/ +#if defined (__i860__) && W_TYPE_SIZE == 32 +# define rshift_rhlc(r,h,l,c) \ + __asm__ ("shr %3,r0,r0\n" \ + "shrd %1,%2,%0" \ + "=r" (r) : "r" (h), "r" (l), "rn" (c)) +#endif /* i860 */ + +/*************************************** + ************** I960 ***************** + ***************************************/ +#if defined (__i960__) && W_TYPE_SIZE == 32 +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("cmpo 1,0\n" \ + "addc %5,%4,%1\n" \ + "addc %3,%2,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%dI" ((USItype)(ah)), \ + "dI" ((USItype)(bh)), \ + "%dI" ((USItype)(al)), \ + "dI" ((USItype)(bl))) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("cmpo 0,0\n" \ + "subc %5,%4,%1\n" \ + "subc %3,%2,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "dI" ((USItype)(ah)), \ + "dI" ((USItype)(bh)), \ + "dI" ((USItype)(al)), \ + "dI" ((USItype)(bl))) +# define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __xx; \ + __asm__ ("emul %2,%1,%0" \ + : "=d" (__xx.__ll) \ + : "%dI" ((USItype)(u)), \ + "dI" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) +# define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("emul %2,%1,%0" \ + : "=d" (__w) \ + : "%dI" ((USItype)(u)), \ + "dI" ((USItype)(v))); \ + __w; }) +# define udiv_qrnnd(q, r, nh, nl, d) \ + do { \ + union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __nn; \ + __nn.__i.__h = (nh); __nn.__i.__l = (nl); \ + __asm__ ("ediv %d,%n,%0" \ + : "=d" (__rq.__ll) \ + : "dI" (__nn.__ll), \ + "dI" ((USItype)(d))); \ + (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ + } while (0) +# define count_leading_zeros(count, x) \ + do { \ + USItype __cbtmp; \ + __asm__ ("scanbit %1,%0" \ + : "=r" (__cbtmp) \ + : "r" ((USItype)(x))); \ + (count) = __cbtmp ^ 31; \ + } while (0) +# define COUNT_LEADING_ZEROS_0 (-32) /* sic */ +# if defined (__i960mx) /* what is the proper symbol to test??? */ +# define rshift_rhlc(r,h,l,c) \ + do { \ + union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __nn; \ + __nn.__i.__h = (h); __nn.__i.__l = (l); \ + __asm__ ("shre %2,%1,%0" \ + : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \ + } +# endif /* i960mx */ +#endif /* i960 */ + + +/*************************************** + ************** 68000 **************** + ***************************************/ +#if (defined (__mc68000__) || defined (__mc68020__) \ + || defined (__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32 +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add%.l %5,%1\n" \ + "addx%.l %3,%0" \ + : "=d" ((USItype)(sh)), \ + "=&d" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "d" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub%.l %5,%1\n" \ + "subx%.l %3,%0" \ + : "=d" ((USItype)(sh)), \ + "=&d" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "d" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +# if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) +# define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mulu%.l %3,%1:%0" \ + : "=d" ((USItype)(w0)), \ + "=d" ((USItype)(w1)) \ + : "%0" ((USItype)(u)), \ + "dmi" ((USItype)(v))) +# define UMUL_TIME 45 +# define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divu%.l %4,%1:%0" \ + : "=d" ((USItype)(q)), \ + "=d" ((USItype)(r)) \ + : "0" ((USItype)(n0)), \ + "1" ((USItype)(n1)), \ + "dmi" ((USItype)(d))) +# define UDIV_TIME 90 +# define sdiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divs%.l %4,%1:%0" \ + : "=d" ((USItype)(q)), \ + "=d" ((USItype)(r)) \ + : "0" ((USItype)(n0)), \ + "1" ((USItype)(n1)), \ + "dmi" ((USItype)(d))) +# define count_leading_zeros(count, x) \ + __asm__ ("bfffo %1{%b2:%b2},%0" \ + : "=d" ((USItype)(count)) \ + : "od" ((USItype)(x)), "n" (0)) +# define COUNT_LEADING_ZEROS_0 32 +# else /* not mc68020 */ +# define umul_ppmm(xh, xl, a, b) \ + do { USItype __umul_tmp1, __umul_tmp2; \ + __asm__ ("| Inlined umul_ppmm \n" \ + " move%.l %5,%3 \n" \ + " move%.l %2,%0 \n" \ + " move%.w %3,%1 \n" \ + " swap %3 \n" \ + " swap %0 \n" \ + " mulu %2,%1 \n" \ + " mulu %3,%0 \n" \ + " mulu %2,%3 \n" \ + " swap %2 \n" \ + " mulu %5,%2 \n" \ + " add%.l %3,%2 \n" \ + " jcc 1f \n" \ + " add%.l %#0x10000,%0 \n" \ + "1: move%.l %2,%3 \n" \ + " clr%.w %2 \n" \ + " swap %2 \n" \ + " swap %3 \n" \ + " clr%.w %3 \n" \ + " add%.l %3,%1 \n" \ + " addx%.l %2,%0 \n" \ + " | End inlined umul_ppmm" \ + : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \ + "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ + : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ + } while (0) +# define UMUL_TIME 100 +# define UDIV_TIME 400 +# endif /* not mc68020 */ +#endif /* mc68000 */ + + +/*************************************** + ************** 88000 **************** + ***************************************/ +#if defined (__m88000__) && W_TYPE_SIZE == 32 +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addu.co %1,%r4,%r5\n" \ + "addu.ci %0,%r2,%r3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%rJ" ((USItype)(ah)), \ + "rJ" ((USItype)(bh)), \ + "%rJ" ((USItype)(al)), \ + "rJ" ((USItype)(bl))) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subu.co %1,%r4,%r5\n" \ + "subu.ci %0,%r2,%r3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "rJ" ((USItype)(ah)), \ + "rJ" ((USItype)(bh)), \ + "rJ" ((USItype)(al)), \ + "rJ" ((USItype)(bl))) +# define count_leading_zeros(count, x) \ + do { \ + USItype __cbtmp; \ + __asm__ ("ff1 %0,%1" \ + : "=r" (__cbtmp) \ + : "r" ((USItype)(x))); \ + (count) = __cbtmp ^ 31; \ + } while (0) +# define COUNT_LEADING_ZEROS_0 63 /* sic */ +# if defined (__m88110__) +# define umul_ppmm(wh, wl, u, v) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \ + (wh) = __x.__i.__h; \ + (wl) = __x.__i.__l; \ + } while (0) +# define udiv_qrnnd(q, r, n1, n0, d) \ + ({union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x, __q; \ + __x.__i.__h = (n1); __x.__i.__l = (n0); \ + __asm__ ("divu.d %0,%1,%2" \ + : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \ + (r) = (n0) - __q.__l * (d); (q) = __q.__l; }) +# define UMUL_TIME 5 +# define UDIV_TIME 25 +# else +# define UMUL_TIME 17 +# define UDIV_TIME 150 +# endif /* __m88110__ */ +#endif /* __m88000__ */ + +/*************************************** + ************** MIPS ***************** + ***************************************/ +#if defined (__mips__) && W_TYPE_SIZE == 32 +# if defined (__clang__) || (__GNUC__ >= 5) || (__GNUC__ == 4 && \ + __GNUC_MINOR__ >= 4) +# define umul_ppmm(w1, w0, u, v) \ + do { \ + UDItype _r; \ + _r = (UDItype) u * v; \ + (w1) = _r >> 32; \ + (w0) = (USItype) _r; \ + } while (0) +# elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7 +# define umul_ppmm(w1, w0, u, v) \ + __asm__ ("multu %2,%3" \ + : "=l" ((USItype)(w0)), \ + "=h" ((USItype)(w1)) \ + : "d" ((USItype)(u)), \ + "d" ((USItype)(v))) +# else +# define umul_ppmm(w1, w0, u, v) \ + __asm__ ("multu %2,%3 \n" \ + "mflo %0 \n" \ + "mfhi %1" \ + : "=d" ((USItype)(w0)), \ + "=d" ((USItype)(w1)) \ + : "d" ((USItype)(u)), \ + "d" ((USItype)(v))) +# endif +# define UMUL_TIME 10 +# define UDIV_TIME 100 +#endif /* __mips__ */ + +/*************************************** + ************** MIPS/64 ************** + ***************************************/ +#if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64 +# if defined (__clang__) || (__GNUC__ >= 5) || (__GNUC__ == 4 && \ + __GNUC_MINOR__ >= 4) +typedef unsigned int UTItype __attribute__ ((mode (TI))); +# define umul_ppmm(w1, w0, u, v) \ + do { \ + UTItype _r; \ + _r = (UTItype) u * v; \ + (w1) = _r >> 64; \ + (w0) = (UDItype) _r; \ + } while (0) +# elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7 +# define umul_ppmm(w1, w0, u, v) \ + __asm__ ("dmultu %2,%3" \ + : "=l" ((UDItype)(w0)), \ + "=h" ((UDItype)(w1)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))) +# else +# define umul_ppmm(w1, w0, u, v) \ + __asm__ ("dmultu %2,%3 \n" \ + "mflo %0 \n" \ + "mfhi %1" \ + : "=d" ((UDItype)(w0)), \ + "=d" ((UDItype)(w1)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))) +# endif +# define UMUL_TIME 20 +# define UDIV_TIME 140 +#endif /* __mips__ */ + + +/*************************************** + ************** 32000 **************** + ***************************************/ +#if defined (__ns32000__) && W_TYPE_SIZE == 32 +# define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __xx; \ + __asm__ ("meid %2,%0" \ + : "=g" (__xx.__ll) \ + : "%0" ((USItype)(u)), \ + "g" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) +# define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("meid %2,%0" \ + : "=g" (__w) \ + : "%0" ((USItype)(u)), \ + "g" ((USItype)(v))); \ + __w; }) +# define udiv_qrnnd(q, r, n1, n0, d) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __xx; \ + __xx.__i.__h = (n1); __xx.__i.__l = (n0); \ + __asm__ ("deid %2,%0" \ + : "=g" (__xx.__ll) \ + : "0" (__xx.__ll), \ + "g" ((USItype)(d))); \ + (r) = __xx.__i.__l; (q) = __xx.__i.__h; }) +# define count_trailing_zeros(count,x) \ + do { + __asm__ ("ffsd %2,%0" \ + : "=r" ((USItype) (count)) \ + : "0" ((USItype) 0), \ + "r" ((USItype) (x))); \ + } while (0) +#endif /* __ns32000__ */ + + +/*************************************** + ************** PPC ****************** + ***************************************/ +#if (defined (_ARCH_PPC) || defined (_IBMR2)) && W_TYPE_SIZE == 32 +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "%r" ((USItype)(ah)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ + else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "%r" ((USItype)(ah)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ + else \ + __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "%r" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ + } while (0) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (ah) && (ah) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else \ + __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "r" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + } while (0) +# define count_leading_zeros(count, x) \ + __asm__ ("{cntlz|cntlzw} %0,%1" \ + : "=r" ((count)) \ + : "r" ((USItype)(x))) +# define COUNT_LEADING_ZEROS_0 32 +# if defined (_ARCH_PPC) +# define umul_ppmm(ph, pl, m0, m1) \ + do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhwu %0,%1,%2" \ + : "=r" (ph) \ + : "%r" (__m0), \ + "r" (__m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +# define UMUL_TIME 15 +# define smul_ppmm(ph, pl, m0, m1) \ + do { \ + SItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhw %0,%1,%2" \ + : "=r" ((SItype) ph) \ + : "%r" (__m0), \ + "r" (__m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +# define SMUL_TIME 14 +# define UDIV_TIME 120 +# else +# define umul_ppmm(xh, xl, m0, m1) \ + do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mul %0,%2,%3" \ + : "=r" ((xh)), \ + "=q" ((xl)) \ + : "r" (__m0), \ + "r" (__m1)); \ + (xh) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ + } while (0) +# define UMUL_TIME 8 +# define smul_ppmm(xh, xl, m0, m1) \ + __asm__ ("mul %0,%2,%3" \ + : "=r" ((SItype)(xh)), \ + "=q" ((SItype)(xl)) \ + : "r" (m0), \ + "r" (m1)) +# define SMUL_TIME 4 +# define sdiv_qrnnd(q, r, nh, nl, d) \ + __asm__ ("div %0,%2,%4" \ + : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \ + : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d))) +# define UDIV_TIME 100 +# endif +#endif /* Power architecture variants. */ + +/* Powerpc 64 bit support taken from gmp-4.1.2. */ +/* We should test _IBMR2 here when we add assembly support for the system + vendor compilers. */ +#if (defined (_ARCH_PPC) || defined (__powerpc__)) && W_TYPE_SIZE == 64 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ + else \ + __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ + } while (0) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (ah) && (ah) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ + else \ + __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ + } while (0) +#define count_leading_zeros(count, x) \ + __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x)) +#define COUNT_LEADING_ZEROS_0 64 +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#define UMUL_TIME 15 +#define smul_ppmm(ph, pl, m0, m1) \ + do { \ + DItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#define SMUL_TIME 14 /* ??? */ +#define UDIV_TIME 120 /* ??? */ +#endif /* 64-bit PowerPC. */ + +/*************************************** + ************** PYR ****************** + ***************************************/ +#if defined (__pyr__) && W_TYPE_SIZE == 32 +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addw %5,%1 \n" \ + "addwc %3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subw %5,%1 \n" \ + "subwb %3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */ +# define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __xx; \ + __asm__ ("movw %1,%R0 \n" \ + "uemul %2,%0" \ + : "=&r" (__xx.__ll) \ + : "g" ((USItype) (u)), \ + "g" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) +#endif /* __pyr__ */ + + +/*************************************** + ************** RT/ROMP ************** + ***************************************/ +#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("a %1,%5 \n" \ + "ae %0,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "r" ((USItype)(bl))) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("s %1,%5\n" \ + "se %0,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "r" ((USItype)(bl))) +# define umul_ppmm(ph, pl, m0, m1) \ + do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ( \ + "s r2,r2 \n" \ + "mts r10,%2 \n" \ + "m r2,%3 \n" \ + "m r2,%3 \n" \ + "m r2,%3 \n" \ + "m r2,%3 \n" \ + "m r2,%3 \n" \ + "m r2,%3 \n" \ + "m r2,%3 \n" \ + "m r2,%3 \n" \ + "m r2,%3 \n" \ + "m r2,%3 \n" \ + "m r2,%3 \n" \ + "m r2,%3 \n" \ + "m r2,%3 \n" \ + "m r2,%3 \n" \ + "m r2,%3 \n" \ + "m r2,%3 \n" \ + "cas %0,r2,r0 \n" \ + "mfs r10,%1" \ + : "=r" ((USItype)(ph)), \ + "=r" ((USItype)(pl)) \ + : "%r" (__m0), \ + "r" (__m1) \ + : "r2"); \ + (ph) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ + } while (0) +# define UMUL_TIME 20 +# define UDIV_TIME 200 +# define count_leading_zeros(count, x) \ + do { \ + if ((x) >= 0x10000) \ + __asm__ ("clz %0,%1" \ + : "=r" ((USItype)(count)) \ + : "r" ((USItype)(x) >> 16)); \ + else \ + { \ + __asm__ ("clz %0,%1" \ + : "=r" ((USItype)(count)) \ + : "r" ((USItype)(x))); \ + (count) += 16; \ + } \ + } while (0) +#endif /* RT/ROMP */ + + +/*************************************** + ************** SH2 ****************** + ***************************************/ +#if (defined (__sh2__) || defined(__sh3__) || defined(__SH4__) ) \ + && W_TYPE_SIZE == 32 +# define umul_ppmm(w1, w0, u, v) \ + __asm__ ( \ + "dmulu.l %2,%3\n" \ + "sts macl,%1\n" \ + "sts mach,%0" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v)) \ + : "macl", "mach") +# define UMUL_TIME 5 +#endif + +/*************************************** + ************** SPARC **************** + ***************************************/ +#if defined (__sparc__) && W_TYPE_SIZE == 32 +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addcc %r4,%5,%1\n" \ + "addx %r2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%rJ" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "%rJ" ((USItype)(al)), \ + "rI" ((USItype)(bl)) \ + __CLOBBER_CC) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subcc %r4,%5,%1\n" \ + "subx %r2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "rJ" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "rJ" ((USItype)(al)), \ + "rI" ((USItype)(bl)) \ + __CLOBBER_CC) +# if defined (__sparc_v8__) || defined(__sparcv8) +/* Don't match immediate range because, 1) it is not often useful, + 2) the 'I' flag thinks of the range as a 13 bit signed interval, + while we want to match a 13 bit interval, sign extended to 32 bits, + but INTERPRETED AS UNSIGNED. */ +# define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v))) +# define UMUL_TIME 5 +# ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */ +# define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + USItype __q; \ + __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ + : "=r" ((USItype)(__q)) \ + : "r" ((USItype)(n1)), \ + "r" ((USItype)(n0)), \ + "r" ((USItype)(d))); \ + (r) = (n0) - __q * (d); \ + (q) = __q; \ + } while (0) +# define UDIV_TIME 25 +# endif /*!SUPERSPARC */ +# else /* ! __sparc_v8__ */ +# if defined (__sparclite__) +/* This has hardware multiply but not divide. It also has two additional + instructions scan (ffs from high bit) and divscc. */ +# define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v))) +# define UMUL_TIME 5 +# define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("! Inlined udiv_qrnnd \n" \ + " wr %%g0,%2,%%y ! Not a delayed write for sparclite \n" \ + " tst %%g0 \n" \ + " divscc %3,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%%g1 \n" \ + " divscc %%g1,%4,%0 \n" \ + " rd %%y,%1 \n" \ + " bl,a 1f \n" \ + " add %1,%4,%1 \n" \ + "1: ! End of inline udiv_qrnnd" \ + : "=r" ((USItype)(q)), \ + "=r" ((USItype)(r)) \ + : "r" ((USItype)(n1)), \ + "r" ((USItype)(n0)), \ + "rI" ((USItype)(d)) \ + : "%g1" __AND_CLOBBER_CC) +# define UDIV_TIME 37 +# define count_leading_zeros(count, x) \ + __asm__ ("scan %1,0,%0" \ + : "=r" ((USItype)(x)) \ + : "r" ((USItype)(count))) +/* Early sparclites return 63 for an argument of 0, but they warn that future + implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0 + undefined. */ +# endif /* !__sparclite__ */ +# endif /* !__sparc_v8__ */ +/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ +# ifndef umul_ppmm +# define umul_ppmm(w1, w0, u, v) \ + __asm__ ("! Inlined umul_ppmm \n" \ + " wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr \n" \ + " sra %3,31,%%g2 ! Don't move this insn \n" \ + " and %2,%%g2,%%g2 ! Don't move this insn \n" \ + " andcc %%g0,0,%%g1 ! Don't move this insn \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,%3,%%g1 \n" \ + " mulscc %%g1,0,%%g1 \n" \ + " add %%g1,%%g2,%0 \n" \ + " rd %%y,%1" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "%rI" ((USItype)(u)), \ + "r" ((USItype)(v)) \ + : "%g1", "%g2" __AND_CLOBBER_CC) +# define UMUL_TIME 39 /* 39 instructions */ +# endif /* umul_ppmm */ +# ifndef udiv_qrnnd +# ifndef LONGLONG_STANDALONE +# define udiv_qrnnd(q, r, n1, n0, d) \ + do { USItype __r; \ + (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ + (r) = __r; \ + } while (0) +extern USItype __udiv_qrnnd (); +# define UDIV_TIME 140 +# endif /* LONGLONG_STANDALONE */ +# endif /* udiv_qrnnd */ +#endif /* __sparc__ */ + + +/*************************************** + ************** VAX ****************** + ***************************************/ +#if defined (__vax__) && W_TYPE_SIZE == 32 +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addl2 %5,%1\n" \ + "adwc %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subl2 %5,%1\n" \ + "sbwc %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +# define umul_ppmm(xh, xl, m0, m1) \ + do { \ + union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __xx; \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("emul %1,%2,$0,%0" \ + : "=g" (__xx.__ll) \ + : "g" (__m0), \ + "g" (__m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + (xh) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ + } while (0) +# define sdiv_qrnnd(q, r, n1, n0, d) \ + do { \ + union {DItype __ll; \ + struct {SItype __l, __h;} __i; \ + } __xx; \ + __xx.__i.__h = n1; __xx.__i.__l = n0; \ + __asm__ ("ediv %3,%2,%0,%1" \ + : "=g" (q), "=g" (r) \ + : "g" (__xx.__ll), "g" (d)); \ + } while (0) +#endif /* __vax__ */ + + +/*************************************** + ************** Z8000 **************** + ***************************************/ +#if defined (__z8000__) && W_TYPE_SIZE == 16 +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ + : "=r" ((unsigned int)(sh)), \ + "=&r" ((unsigned int)(sl)) \ + : "%0" ((unsigned int)(ah)), \ + "r" ((unsigned int)(bh)), \ + "%1" ((unsigned int)(al)), \ + "rQR" ((unsigned int)(bl))) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ + : "=r" ((unsigned int)(sh)), \ + "=&r" ((unsigned int)(sl)) \ + : "0" ((unsigned int)(ah)), \ + "r" ((unsigned int)(bh)), \ + "1" ((unsigned int)(al)), \ + "rQR" ((unsigned int)(bl))) +# define umul_ppmm(xh, xl, m0, m1) \ + do { \ + union {long int __ll; \ + struct {unsigned int __h, __l;} __i; \ + } __xx; \ + unsigned int __m0 = (m0), __m1 = (m1); \ + __asm__ ("mult %S0,%H3" \ + : "=r" (__xx.__i.__h), \ + "=r" (__xx.__i.__l) \ + : "%1" (__m0), \ + "rQR" (__m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + (xh) += ((((signed int) __m0 >> 15) & __m1) \ + + (((signed int) __m1 >> 15) & __m0)); \ + } while (0) +#endif /* __z8000__ */ + + +/*************************************** + *********** s390x/zSeries ************ + ***************************************/ +#if defined (__s390x__) && W_TYPE_SIZE == 64 && __GNUC__ >= 4 +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("algr %1,%5\n" \ + "alcgr %0,%3\n" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "0" ((UDItype)(ah)), \ + "r" ((UDItype)(bh)), \ + "1" ((UDItype)(al)), \ + "r" ((UDItype)(bl)) \ + __CLOBBER_CC) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("slgr %1,%5\n" \ + "slbgr %0,%3\n" \ + : "=r" ((sh)), \ + "=&r" ((sl)) \ + : "0" ((UDItype)(ah)), \ + "r" ((UDItype)(bh)), \ + "1" ((UDItype)(al)), \ + "r" ((UDItype)(bl)) \ + __CLOBBER_CC) +typedef unsigned int UTItype __attribute__ ((mode (TI))); +# define umul_ppmm(w1, w0, u, v) \ + do { \ + UTItype ___r; \ + __asm__ ("mlgr %0,%2" \ + : "=r" (___r) \ + : "0" ((UDItype)(u)), \ + "r" ((UDItype)(v))); \ + (w1) = ___r >> 64; \ + (w0) = (UDItype) ___r; \ + } while (0) +# define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + UTItype ___r = ((UTItype)n1 << 64) | n0; \ + __asm__ ("dlgr %0,%2" \ + : "=r" (___r) \ + : "0" (___r), \ + "r" ((UDItype)(d))); \ + (r) = ___r >> 64; \ + (q) = (UDItype) ___r; \ + } while (0) +#endif /* __s390x__ */ + + +/*************************************** + ***** End CPU Specific Versions ***** + ***************************************/ + +#endif /* __GNUC__ */ +#endif /* !__riscos__ */ + + +/*************************************** + *********** Generic Versions ******** + ***************************************/ +#if !defined (umul_ppmm) && defined (__umulsidi3) +# define umul_ppmm(ph, pl, m0, m1) \ + { \ + UDWtype __ll = __umulsidi3 (m0, m1); \ + ph = (UWtype) (__ll >> W_TYPE_SIZE); \ + pl = (UWtype) __ll; \ + } +#endif + +#if !defined (__umulsidi3) +# define __umulsidi3(u, v) \ + ({UWtype __hi, __lo; \ + umul_ppmm (__hi, __lo, u, v); \ + ((UDWtype) __hi << W_TYPE_SIZE) | __lo; }) +#endif + +/* If this machine has no inline assembler, use C macros. */ + +#if !defined (add_ssaaaa) +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + UWtype __x; \ + __x = (al) + (bl); \ + (sh) = (ah) + (bh) + (__x < (al)); \ + (sl) = __x; \ + } while (0) +#endif + +#if !defined (sub_ddmmss) +# define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + UWtype __x; \ + __x = (al) - (bl); \ + (sh) = (ah) - (bh) - (__x > (al)); \ + (sl) = __x; \ + } while (0) +#endif + +#if !defined (umul_ppmm) +# define umul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __x0, __x1, __x2, __x3; \ + UHWtype __ul, __vl, __uh, __vh; \ + UWtype __u = (u), __v = (v); \ + \ + __ul = __ll_lowpart (__u); \ + __uh = __ll_highpart (__u); \ + __vl = __ll_lowpart (__v); \ + __vh = __ll_highpart (__v); \ + \ + __x0 = (UWtype) __ul * __vl; \ + __x1 = (UWtype) __ul * __vh; \ + __x2 = (UWtype) __uh * __vl; \ + __x3 = (UWtype) __uh * __vh; \ + \ + __x1 += __ll_highpart (__x0);/* this can't give carry */ \ + __x1 += __x2; /* but this indeed can */ \ + if (__x1 < __x2) /* did we get it? */ \ + __x3 += __ll_B; /* yes, add it in the proper pos. */ \ + \ + (w1) = __x3 + __ll_highpart (__x1); \ + (w0) = (__ll_lowpart (__x1) << W_TYPE_SIZE/2) + __ll_lowpart (__x0);\ + } while (0) +#endif + +#if !defined (umul_ppmm) +# define smul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __w1; \ + UWtype __m0 = (u), __m1 = (v); \ + umul_ppmm (__w1, w0, __m0, __m1); \ + (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \ + - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \ + } while (0) +#endif + +/* Define this unconditionally, so it can be used for debugging. */ +#define __udiv_qrnnd_c(q, r, n1, n0, d) \ + do { \ + UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ + __d1 = __ll_highpart (d); \ + __d0 = __ll_lowpart (d); \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (UWtype) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart (n0); \ + if (__r1 < __m) \ + { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (UWtype) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ + if (__r0 < __m) \ + { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (UWtype) __q1 * __ll_B | __q0; \ + (r) = __r0; \ + } while (0) + +/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through + __udiv_w_sdiv (defined in libgcc or elsewhere). */ +#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) +# define udiv_qrnnd(q, r, nh, nl, d) \ + do { \ + UWtype __r; \ + (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \ + (r) = __r; \ + } while (0) +#endif + +/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ +#if !defined (udiv_qrnnd) +# define UDIV_NEEDS_NORMALIZATION 1 +# define udiv_qrnnd __udiv_qrnnd_c +#endif + +#if !defined (count_leading_zeros) +# if defined (HAVE_BUILTIN_CLZL) && SIZEOF_UNSIGNED_LONG * 8 == W_TYPE_SIZE +# define count_leading_zeros(count, x) (count = __builtin_clzl(x)) +# undef COUNT_LEADING_ZEROS_0 /* Input X=0 is undefined for the builtin. */ +# elif defined (HAVE_BUILTIN_CLZ) && SIZEOF_UNSIGNED_INT * 8 == W_TYPE_SIZE +# define count_leading_zeros(count, x) (count = __builtin_clz(x)) +# undef COUNT_LEADING_ZEROS_0 /* Input X=0 is undefined for the builtin. */ +# endif +#endif + +#if !defined (count_trailing_zeros) +# if defined (HAVE_BUILTIN_CTZL) && SIZEOF_UNSIGNED_LONG * 8 == W_TYPE_SIZE +# define count_trailing_zeros(count, x) (count = __builtin_ctzl(x)) +# undef COUNT_LEADING_ZEROS_0 /* Input X=0 is undefined for the builtin. */ +# elif defined (HAVE_BUILTIN_CTZ) && SIZEOF_UNSIGNED_INT * 8 == W_TYPE_SIZE +# define count_trailing_zeros(count, x) (count = __builtin_ctz(x)) +# undef COUNT_LEADING_ZEROS_0 /* Input X=0 is undefined for the builtin. */ +# endif +#endif + +#if !defined (count_leading_zeros) +extern +# ifdef __STDC__ +const +# endif +unsigned char _gcry_clz_tab[]; +# define MPI_INTERNAL_NEED_CLZ_TAB 1 +# define count_leading_zeros(count, x) \ + do { \ + UWtype __xr = (x); \ + UWtype __a; \ + \ + if (W_TYPE_SIZE <= 32) \ + { \ + __a = __xr < ((UWtype) 1 << 2*__BITS4) \ + ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \ + : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 : 3*__BITS4);\ + } \ + else \ + { \ + for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \ + if (((__xr >> __a) & 0xff) != 0) \ + break; \ + } \ + \ + (count) = W_TYPE_SIZE - (_gcry_clz_tab[__xr >> __a] + __a); \ + } while (0) +/* This version gives a well-defined value for zero. */ +# define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE +#endif /* !count_leading_zeros */ + +#if !defined (count_trailing_zeros) +/* Define count_trailing_zeros using count_leading_zeros. The latter might be + defined in asm, but if it is not, the C version above is good enough. */ +# define count_trailing_zeros(count, x) \ + do { \ + UWtype __ctz_x = (x); \ + UWtype __ctz_c; \ + count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \ + (count) = W_TYPE_SIZE - 1 - __ctz_c; \ + } while (0) +#endif /* !count_trailing_zeros */ + +#ifndef UDIV_NEEDS_NORMALIZATION +# define UDIV_NEEDS_NORMALIZATION 0 +#endif + +/*************************************** + ****** longlong.h ends here ********* + ***************************************/ diff --git a/comm/third_party/libgcrypt/mpi/m68k/distfiles b/comm/third_party/libgcrypt/mpi/m68k/distfiles new file mode 100644 index 0000000000..4c0967b840 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/m68k/distfiles @@ -0,0 +1,8 @@ +syntax.h +mpih-lshift.S +mpih-rshift.S +mpih-add1.S +mpih-sub1.S + + + diff --git a/comm/third_party/libgcrypt/mpi/m68k/mc68020/distfiles b/comm/third_party/libgcrypt/mpi/m68k/mc68020/distfiles new file mode 100644 index 0000000000..fc7df9fa35 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/m68k/mc68020/distfiles @@ -0,0 +1,3 @@ +mpih-mul1.S +mpih-mul2.S +mpih-mul3.S diff --git a/comm/third_party/libgcrypt/mpi/m68k/mc68020/mpih-mul1.S b/comm/third_party/libgcrypt/mpi/m68k/mc68020/mpih-mul1.S new file mode 100644 index 0000000000..007c94c6db --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/m68k/mc68020/mpih-mul1.S @@ -0,0 +1,104 @@ +/* mc68020 __mpn_mul_1 -- Multiply a limb vector with a limb and store + * the result in a second limb vector. + * + * Copyright (C) 1992, 1994, 1996, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + */ + + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(_gcry_mpih_mul_1) + +C_SYMBOL_NAME(_gcry_mpih_mul_1:) +PROLOG(_gcry_mpih_mul_1) + +#define res_ptr a0 +#define s1_ptr a1 +#define s1_size d2 +#define s2_limb d4 + +/* Save used registers on the stack. */ + moveml R(d2)-R(d4),MEM_PREDEC(sp) +#if 0 + movel R(d2),MEM_PREDEC(sp) + movel R(d3),MEM_PREDEC(sp) + movel R(d4),MEM_PREDEC(sp) +#endif + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,16),R(res_ptr) + movel MEM_DISP(sp,20),R(s1_ptr) + movel MEM_DISP(sp,24),R(s1_size) + movel MEM_DISP(sp,28),R(s2_limb) + + eorw #1,R(s1_size) + clrl R(d1) + lsrl #1,R(s1_size) + bcc L(L1) + subql #1,R(s1_size) + subl R(d0),R(d0) /* (d0,cy) <= (0,0) */ + +L(Loop:) + movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d1):R(d3) + addxl R(d0),R(d3) + movel R(d3),MEM_POSTINC(res_ptr) +L(L1:) movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d0):R(d3) + addxl R(d1),R(d3) + movel R(d3),MEM_POSTINC(res_ptr) + + dbf R(s1_size),L(Loop) + clrl R(d3) + addxl R(d3),R(d0) + subl #0x10000,R(s1_size) + bcc L(Loop) + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d4) +#if 0 + movel MEM_POSTINC(sp),R(d4) + movel MEM_POSTINC(sp),R(d3) + movel MEM_POSTINC(sp),R(d2) +#endif + rts +EPILOG(_gcry_mpih_mul_1) + + diff --git a/comm/third_party/libgcrypt/mpi/m68k/mc68020/mpih-mul2.S b/comm/third_party/libgcrypt/mpi/m68k/mc68020/mpih-mul2.S new file mode 100644 index 0000000000..44baa8d887 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/m68k/mc68020/mpih-mul2.S @@ -0,0 +1,94 @@ +/* mc68020 __mpn_addmul_1 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * + * Copyright (C) 1992, 1994, 1996, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + +/******************* + * mpi_limb_t + * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + */ + + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(_gcry_mpih_addmul_1) + +C_SYMBOL_NAME(_gcry_mpih_addmul_1:) +PROLOG(_gcry_mpih_addmul_1) + +#define res_ptr a0 +#define s1_ptr a1 +#define s1_size d2 +#define s2_limb d4 + +/* Save used registers on the stack. */ + moveml R(d2)-R(d5),MEM_PREDEC(sp) + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,20),R(res_ptr) + movel MEM_DISP(sp,24),R(s1_ptr) + movel MEM_DISP(sp,28),R(s1_size) + movel MEM_DISP(sp,32),R(s2_limb) + + eorw #1,R(s1_size) + clrl R(d1) + clrl R(d5) + lsrl #1,R(s1_size) + bcc L(L1) + subql #1,R(s1_size) + subl R(d0),R(d0) /* (d0,cy) <= (0,0) */ + +L(Loop:) + movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d1):R(d3) + addxl R(d0),R(d3) + addxl R(d5),R(d1) + addl R(d3),MEM_POSTINC(res_ptr) +L(L1:) movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d0):R(d3) + addxl R(d1),R(d3) + addxl R(d5),R(d0) + addl R(d3),MEM_POSTINC(res_ptr) + + dbf R(s1_size),L(Loop) + addxl R(d5),R(d0) + subl #0x10000,R(s1_size) + bcc L(Loop) + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d5) + + rts +EPILOG(_gcry_mpih_addmul_1) + diff --git a/comm/third_party/libgcrypt/mpi/m68k/mc68020/mpih-mul3.S b/comm/third_party/libgcrypt/mpi/m68k/mc68020/mpih-mul3.S new file mode 100644 index 0000000000..e958ef6117 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/m68k/mc68020/mpih-mul3.S @@ -0,0 +1,97 @@ +/* mc68020 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract + * the result from a second limb vector. + * + * Copyright (C) 1992, 1994, 1996, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + + + +#include "sysdep.h" +#include "asm-syntax.h" + +/******************* + * mpi_limb_t + * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + */ + + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(_gcry_mpih_submul_1) + +C_SYMBOL_NAME(_gcry_mpih_submul_1:) +PROLOG(_gcry_mpih_submul_1) + +#define res_ptr a0 +#define s1_ptr a1 +#define s1_size d2 +#define s2_limb d4 + +/* Save used registers on the stack. */ + moveml R(d2)-R(d5),MEM_PREDEC(sp) + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,20),R(res_ptr) + movel MEM_DISP(sp,24),R(s1_ptr) + movel MEM_DISP(sp,28),R(s1_size) + movel MEM_DISP(sp,32),R(s2_limb) + + eorw #1,R(s1_size) + clrl R(d1) + clrl R(d5) + lsrl #1,R(s1_size) + bcc L(L1) + subql #1,R(s1_size) + subl R(d0),R(d0) /* (d0,cy) <= (0,0) */ + +L(Loop:) + movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d1):R(d3) + addxl R(d0),R(d3) + addxl R(d5),R(d1) + subl R(d3),MEM_POSTINC(res_ptr) +L(L1:) movel MEM_POSTINC(s1_ptr),R(d3) + mulul R(s2_limb),R(d0):R(d3) + addxl R(d1),R(d3) + addxl R(d5),R(d0) + subl R(d3),MEM_POSTINC(res_ptr) + + dbf R(s1_size),L(Loop) + addxl R(d5),R(d0) + subl #0x10000,R(s1_size) + bcc L(Loop) + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d5) + + rts +EPILOG(_gcry_mpih_submul_1) + + diff --git a/comm/third_party/libgcrypt/mpi/m68k/mpih-add1.S b/comm/third_party/libgcrypt/mpi/m68k/mpih-add1.S new file mode 100644 index 0000000000..8182d21a32 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/m68k/mpih-add1.S @@ -0,0 +1,92 @@ +/* mc68020 __mpn_add_n -- Add two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 1992, 1994,1996, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_add_n( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_ptr_t s2_ptr, (sp + 16) + * mpi_size_t size) (sp + 12) + */ + + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(_gcry_mpih_add_n) + +C_SYMBOL_NAME(_gcry_mpih_add_n:) +PROLOG(_gcry_mpih_add_n) + /* Save used registers on the stack. */ + movel R(d2),MEM_PREDEC(sp) + movel R(a2),MEM_PREDEC(sp) + + /* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,12),R(a2) + movel MEM_DISP(sp,16),R(a0) + movel MEM_DISP(sp,20),R(a1) + movel MEM_DISP(sp,24),R(d2) + + eorw #1,R(d2) + lsrl #1,R(d2) + bcc L(L1) + subql #1,R(d2) /* clears cy as side effect */ + +L(Loop:) + movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + addxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) +L(L1:) movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + addxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) + + dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */ + subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */ + subl #0x10000,R(d2) + bcs L(L2) + addl R(d0),R(d0) /* restore cy */ + bra L(Loop) + +L(L2:) + negl R(d0) + + /* Restore used registers from stack frame. */ + movel MEM_POSTINC(sp),R(a2) + movel MEM_POSTINC(sp),R(d2) + + rts +EPILOG(_gcry_mpih_add_n) + + diff --git a/comm/third_party/libgcrypt/mpi/m68k/mpih-lshift.S b/comm/third_party/libgcrypt/mpi/m68k/mpih-lshift.S new file mode 100644 index 0000000000..133d1aae3d --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/m68k/mpih-lshift.S @@ -0,0 +1,164 @@ +/* mc68020 lshift -- Shift left a low-level natural-number integer. + * + * Copyright (C) 1996, 1998, 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_lshift( mpi_ptr_t wp, (sp + 4) + * mpi_ptr_t up, (sp + 8) + * mpi_size_t usize, (sp + 12) + * unsigned cnt) (sp + 16) + */ + +#define res_ptr a1 +#define s_ptr a0 +#define s_size d6 +#define cnt d4 + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(_gcry_mpih_lshift) + +C_SYMBOL_NAME(_gcry_mpih_lshift:) +PROLOG(_gcry_mpih_lshift) + + /* Save used registers on the stack. */ + moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp) + + /* Copy the arguments to registers. */ + movel MEM_DISP(sp,28),R(res_ptr) + movel MEM_DISP(sp,32),R(s_ptr) + movel MEM_DISP(sp,36),R(s_size) + movel MEM_DISP(sp,40),R(cnt) + + moveql #1,R(d5) + cmpl R(d5),R(cnt) + bne L(Lnormal) + cmpl R(s_ptr),R(res_ptr) + bls L(Lspecial) /* jump if s_ptr >= res_ptr */ +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(s_ptr,s_size,l,4),R(a2) +#else /* not mc68020 */ + movel R(s_size),R(d0) + asll #2,R(d0) + lea MEM_INDX(s_ptr,d0,l),R(a2) +#endif + cmpl R(res_ptr),R(a2) + bls L(Lspecial) /* jump if res_ptr >= s_ptr + s_size */ + +L(Lnormal:) + moveql #32,R(d5) + subl R(cnt),R(d5) + +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr) + lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr) +#else /* not mc68000 */ + movel R(s_size),R(d0) + asll #2,R(d0) + addl R(s_size),R(s_ptr) + addl R(s_size),R(res_ptr) +#endif + movel MEM_PREDEC(s_ptr),R(d2) + movel R(d2),R(d0) + lsrl R(d5),R(d0) /* compute carry limb */ + + lsll R(cnt),R(d2) + movel R(d2),R(d1) + subql #1,R(s_size) + beq L(Lend) + lsrl #1,R(s_size) + bcs L(L1) + subql #1,R(s_size) + +L(Loop:) + movel MEM_PREDEC(s_ptr),R(d2) + movel R(d2),R(d3) + lsrl R(d5),R(d3) + orl R(d3),R(d1) + movel R(d1),MEM_PREDEC(res_ptr) + lsll R(cnt),R(d2) +L(L1:) + movel MEM_PREDEC(s_ptr),R(d1) + movel R(d1),R(d3) + lsrl R(d5),R(d3) + orl R(d3),R(d2) + movel R(d2),MEM_PREDEC(res_ptr) + lsll R(cnt),R(d1) + + dbf R(s_size),L(Loop) + subl #0x10000,R(s_size) + bcc L(Loop) + +L(Lend:) + movel R(d1),MEM_PREDEC(res_ptr) /* store least significant limb */ + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts + +/* We loop from least significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. */ + +L(Lspecial:) + clrl R(d0) /* initialize carry */ + eorw #1,R(s_size) + lsrl #1,R(s_size) + bcc L(LL1) + subql #1,R(s_size) + +L(LLoop:) + movel MEM_POSTINC(s_ptr),R(d2) + addxl R(d2),R(d2) + movel R(d2),MEM_POSTINC(res_ptr) +L(LL1:) + movel MEM_POSTINC(s_ptr),R(d2) + addxl R(d2),R(d2) + movel R(d2),MEM_POSTINC(res_ptr) + + dbf R(s_size),L(LLoop) + addxl R(d0),R(d0) /* save cy in lsb */ + subl #0x10000,R(s_size) + bcs L(LLend) + lsrl #1,R(d0) /* restore cy */ + bra L(LLoop) + +L(LLend:) +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts +EPILOG(_gcry_mpih_lshift) + + + + + diff --git a/comm/third_party/libgcrypt/mpi/m68k/mpih-rshift.S b/comm/third_party/libgcrypt/mpi/m68k/mpih-rshift.S new file mode 100644 index 0000000000..be9f43502f --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/m68k/mpih-rshift.S @@ -0,0 +1,162 @@ +/* mc68020 rshift -- Shift right a low-level natural-number integer. + * + * Copyright (C) 1996, 1998, 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_rshift( mpi_ptr_t wp, (sp + 4) + * mpi_ptr_t up, (sp + 8) + * mpi_size_t usize, (sp + 12) + * unsigned cnt) (sp + 16) + */ + +#define res_ptr a1 +#define s_ptr a0 +#define s_size d6 +#define cnt d4 + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(_gcry_mpih_rshift) + +C_SYMBOL_NAME(_gcry_mpih_rshift:) +PROLOG(_gcry_mpih_rshift) + /* Save used registers on the stack. */ + moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp) + + /* Copy the arguments to registers. */ + movel MEM_DISP(sp,28),R(res_ptr) + movel MEM_DISP(sp,32),R(s_ptr) + movel MEM_DISP(sp,36),R(s_size) + movel MEM_DISP(sp,40),R(cnt) + + moveql #1,R(d5) + cmpl R(d5),R(cnt) + bne L(Rnormal) + cmpl R(res_ptr),R(s_ptr) + bls L(Rspecial) /* jump if res_ptr >= s_ptr */ +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(res_ptr,s_size,l,4),R(a2) +#else /* not mc68020 */ + movel R(s_size),R(d0) + asll #2,R(d0) + lea MEM_INDX(res_ptr,d0,l),R(a2) +#endif + cmpl R(s_ptr),R(a2) + bls L(Rspecial) /* jump if s_ptr >= res_ptr + s_size */ + +L(Rnormal:) + moveql #32,R(d5) + subl R(cnt),R(d5) + movel MEM_POSTINC(s_ptr),R(d2) + movel R(d2),R(d0) + lsll R(d5),R(d0) /* compute carry limb */ + + lsrl R(cnt),R(d2) + movel R(d2),R(d1) + subql #1,R(s_size) + beq L(Rend) + lsrl #1,R(s_size) + bcs L(R1) + subql #1,R(s_size) + +L(Roop:) + movel MEM_POSTINC(s_ptr),R(d2) + movel R(d2),R(d3) + lsll R(d5),R(d3) + orl R(d3),R(d1) + movel R(d1),MEM_POSTINC(res_ptr) + lsrl R(cnt),R(d2) +L(R1:) + movel MEM_POSTINC(s_ptr),R(d1) + movel R(d1),R(d3) + lsll R(d5),R(d3) + orl R(d3),R(d2) + movel R(d2),MEM_POSTINC(res_ptr) + lsrl R(cnt),R(d1) + + dbf R(s_size),L(Roop) + subl #0x10000,R(s_size) + bcc L(Roop) + +L(Rend:) + movel R(d1),MEM(res_ptr) /* store most significant limb */ + +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts + +/* We loop from most significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. */ + +L(Rspecial:) +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) + lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr) + lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr) +#else /* not mc68000 */ + movel R(s_size),R(d0) + asll #2,R(d0) + addl R(s_size),R(s_ptr) + addl R(s_size),R(res_ptr) +#endif + + clrl R(d0) /* initialize carry */ + eorw #1,R(s_size) + lsrl #1,R(s_size) + bcc L(LR1) + subql #1,R(s_size) + +L(LRoop:) + movel MEM_PREDEC(s_ptr),R(d2) + roxrl #1,R(d2) + movel R(d2),MEM_PREDEC(res_ptr) +L(LR1:) + movel MEM_PREDEC(s_ptr),R(d2) + roxrl #1,R(d2) + movel R(d2),MEM_PREDEC(res_ptr) + + dbf R(s_size),L(LRoop) + roxrl #1,R(d0) /* save cy in msb */ + subl #0x10000,R(s_size) + bcs L(LRend) + addl R(d0),R(d0) /* restore cy */ + bra L(LRoop) + +L(LRend:) +/* Restore used registers from stack frame. */ + moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2) + rts +EPILOG(_gcry_mpih_rshift) + + + + diff --git a/comm/third_party/libgcrypt/mpi/m68k/mpih-sub1.S b/comm/third_party/libgcrypt/mpi/m68k/mpih-sub1.S new file mode 100644 index 0000000000..ee7555f897 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/m68k/mpih-sub1.S @@ -0,0 +1,91 @@ +/* mc68020 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and + * store difference in a third limb vector. + * + * Copyright (C) 1992, 1994, 1996, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_ptr_t s2_ptr, (sp + 16) + * mpi_size_t size) (sp + 12) + */ + + + TEXT + ALIGN + GLOBL C_SYMBOL_NAME(_gcry_mpih_sub_n) + +C_SYMBOL_NAME(_gcry_mpih_sub_n:) +PROLOG(_gcry_mpih_sub_n) +/* Save used registers on the stack. */ + movel R(d2),MEM_PREDEC(sp) + movel R(a2),MEM_PREDEC(sp) + +/* Copy the arguments to registers. Better use movem? */ + movel MEM_DISP(sp,12),R(a2) + movel MEM_DISP(sp,16),R(a0) + movel MEM_DISP(sp,20),R(a1) + movel MEM_DISP(sp,24),R(d2) + + eorw #1,R(d2) + lsrl #1,R(d2) + bcc L(L1) + subql #1,R(d2) /* clears cy as side effect */ + +L(Loop:) + movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + subxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) +L(L1:) movel MEM_POSTINC(a0),R(d0) + movel MEM_POSTINC(a1),R(d1) + subxl R(d1),R(d0) + movel R(d0),MEM_POSTINC(a2) + + dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */ + subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */ + subl #0x10000,R(d2) + bcs L(L2) + addl R(d0),R(d0) /* restore cy */ + bra L(Loop) + +L(L2:) + negl R(d0) + +/* Restore used registers from stack frame. */ + movel MEM_POSTINC(sp),R(a2) + movel MEM_POSTINC(sp),R(d2) + + rts +EPILOG(_gcry_mpih_sub_n) + + diff --git a/comm/third_party/libgcrypt/mpi/m68k/syntax.h b/comm/third_party/libgcrypt/mpi/m68k/syntax.h new file mode 100644 index 0000000000..e27de98b4f --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/m68k/syntax.h @@ -0,0 +1,185 @@ +/* asm.h -- Definitions for 68k syntax variations. + * + * Copyright (C) 1992, 1994, 1996, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#undef ALIGN + +#ifdef MIT_SYNTAX +#define PROLOG(name) +#define EPILOG(name) +#define R(r)r +#define MEM(base)base@ +#define MEM_DISP(base,displacement)base@(displacement) +#define MEM_INDX(base,idx,size_suffix)base@(idx:size_suffix) +#define MEM_INDX1(base,idx,size_suffix,scale)base@(idx:size_suffix:scale) +#define MEM_PREDEC(memory_base)memory_base@- +#define MEM_POSTINC(memory_base)memory_base@+ +#define L(label) label +#define TEXT .text +#define ALIGN .even +#define GLOBL .globl +#define moveql moveq +/* Use variable sized opcodes. */ +#define bcc jcc +#define bcs jcs +#define bls jls +#define beq jeq +#define bne jne +#define bra jra +#endif + +#ifdef SONY_SYNTAX +#define PROLOG(name) +#define EPILOG(name) +#define R(r)r +#define MEM(base)(base) +#define MEM_DISP(base,displacement)(displacement,base) +#define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix) +#define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale) +#define MEM_PREDEC(memory_base)-(memory_base) +#define MEM_POSTINC(memory_base)(memory_base)+ +#define L(label) label +#define TEXT .text +#define ALIGN .even +#define GLOBL .globl +#endif + +#ifdef MOTOROLA_SYNTAX +#define PROLOG(name) +#define EPILOG(name) +#define R(r)r +#define MEM(base)(base) +#define MEM_DISP(base,displacement)(displacement,base) +#define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix) +#define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale) +#define MEM_PREDEC(memory_base)-(memory_base) +#define MEM_POSTINC(memory_base)(memory_base)+ +#define L(label) label +#define TEXT +#define ALIGN +#define GLOBL XDEF +#define lea LEA +#define movel MOVE.L +#define moveml MOVEM.L +#define moveql MOVEQ.L +#define cmpl CMP.L +#define orl OR.L +#define clrl CLR.L +#define eorw EOR.W +#define lsrl LSR.L +#define lsll LSL.L +#define roxrl ROXR.L +#define roxll ROXL.L +#define addl ADD.L +#define addxl ADDX.L +#define addql ADDQ.L +#define subl SUB.L +#define subxl SUBX.L +#define subql SUBQ.L +#define negl NEG.L +#define mulul MULU.L +#define bcc BCC +#define bcs BCS +#define bls BLS +#define beq BEQ +#define bne BNE +#define bra BRA +#define dbf DBF +#define rts RTS +#define d0 D0 +#define d1 D1 +#define d2 D2 +#define d3 D3 +#define d4 D4 +#define d5 D5 +#define d6 D6 +#define d7 D7 +#define a0 A0 +#define a1 A1 +#define a2 A2 +#define a3 A3 +#define a4 A4 +#define a5 A5 +#define a6 A6 +#define a7 A7 +#define sp SP +#endif + +#ifdef ELF_SYNTAX +#define PROLOG(name) .type name,@function +#define EPILOG(name) .size name,.-name +#define MEM(base)(R(base)) +#define MEM_DISP(base,displacement)(displacement,R(base)) +#define MEM_PREDEC(memory_base)-(R(memory_base)) +#define MEM_POSTINC(memory_base)(R(memory_base))+ +#ifdef __STDC__ +#define R_(r)%##r +#define R(r)R_(r) +#define MEM_INDX_(base,idx,size_suffix)(R(base),R(idx##.##size_suffix)) +#define MEM_INDX(base,idx,size_suffix)MEM_INDX_(base,idx,size_suffix) +#define MEM_INDX1_(base,idx,size_suffix,scale)(R(base),R(idx##.##size_suffix*scale)) +#define MEM_INDX1(base,idx,size_suffix,scale)MEM_INDX1_(base,idx,size_suffix,scale) +#define L(label) .##label +#else +#define R(r)%/**/r +#define MEM_INDX(base,idx,size_suffix)(R(base),R(idx).size_suffix) +#define MEM_INDX1(base,idx,size_suffix,scale)(R(base),R(idx).size_suffix*scale) +#define L(label) ./**/label +#endif +#define TEXT .text +#define ALIGN .align 2 +#define GLOBL .globl +#define bcc jbcc +#define bcs jbcs +#define bls jbls +#define beq jbeq +#define bne jbne +#define bra jbra +#endif + +#if defined (SONY_SYNTAX) || defined (ELF_SYNTAX) +#define movel move.l +#define moveml movem.l +#define moveql moveq.l +#define cmpl cmp.l +#define orl or.l +#define clrl clr.l +#define eorw eor.w +#define lsrl lsr.l +#define lsll lsl.l +#define roxrl roxr.l +#define roxll roxl.l +#define addl add.l +#define addxl addx.l +#define addql addq.l +#define subl sub.l +#define subxl subx.l +#define subql subq.l +#define negl neg.l +#define mulul mulu.l +#endif diff --git a/comm/third_party/libgcrypt/mpi/mips3/README b/comm/third_party/libgcrypt/mpi/mips3/README new file mode 100644 index 0000000000..4ba4546d9f --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mips3/README @@ -0,0 +1,23 @@ +This directory contains mpn functions optimized for MIPS3. Example of +processors that implement MIPS3 are R4000, R4400, R4600, R4700, and R8000. + +RELEVANT OPTIMIZATION ISSUES + +1. On the R4000 and R4400, branches, both the plain and the "likely" ones, + take 3 cycles to execute. (The fastest possible loop will take 4 cycles, + because of the delay insn.) + + On the R4600, branches takes a single cycle + + On the R8000, branches often take no noticeable cycles, as they are + executed in a separate function unit.. + +2. The R4000 and R4400 have a load latency of 4 cycles. + +3. On the R4000 and R4400, multiplies take a data-dependent number of + cycles, contrary to the SGI documentation. There seem to be 3 or 4 + possible latencies. + +STATUS + +Good... diff --git a/comm/third_party/libgcrypt/mpi/mips3/distfiles b/comm/third_party/libgcrypt/mpi/mips3/distfiles new file mode 100644 index 0000000000..85260fc8e7 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mips3/distfiles @@ -0,0 +1,10 @@ +README +mpih-add1.S +mpih-sub1.S +mpih-mul1.S +mpih-mul2.S +mpih-mul3.S +mpih-lshift.S +mpih-rshift.S +mpi-asm-defs.h + diff --git a/comm/third_party/libgcrypt/mpi/mips3/mpi-asm-defs.h b/comm/third_party/libgcrypt/mpi/mips3/mpi-asm-defs.h new file mode 100644 index 0000000000..2d9a9c1f2c --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mips3/mpi-asm-defs.h @@ -0,0 +1,10 @@ +/* This file defines some basic constants for the MPI machinery. We + * need to define the types on a per-CPU basis, so it is done with + * this file here. */ +#define BYTES_PER_MPI_LIMB 8 + + + + + + diff --git a/comm/third_party/libgcrypt/mpi/mips3/mpih-add1.S b/comm/third_party/libgcrypt/mpi/mips3/mpih-add1.S new file mode 100644 index 0000000000..f3db029de4 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mips3/mpih-add1.S @@ -0,0 +1,124 @@ +/* mips3 add_n -- Add two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 1995, 1998, 2000 + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + +/******************* + * mpi_limb_t + * _gcry_mpih_add_n( mpi_ptr_t res_ptr, ($4) + * mpi_ptr_t s1_ptr, ($5) + * mpi_ptr_t s2_ptr, ($6) + * mpi_size_t size) ($7) + */ + + .text + .align 2 + .globl _gcry_mpih_add_n + .ent _gcry_mpih_add_n +_gcry_mpih_add_n: + .set noreorder + .set nomacro + + ld $10,0($5) + ld $11,0($6) + + daddiu $7,$7,-1 + and $9,$7,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + move $2,$0 + + dsubu $7,$7,$9 + +.Loop0: daddiu $9,$9,-1 + ld $12,8($5) + daddu $11,$11,$2 + ld $13,8($6) + sltu $8,$11,$2 + daddu $11,$10,$11 + sltu $2,$11,$10 + sd $11,0($4) + or $2,$2,$8 + + daddiu $5,$5,8 + daddiu $6,$6,8 + move $10,$12 + move $11,$13 + bne $9,$0,.Loop0 + daddiu $4,$4,8 + +.L0: beq $7,$0,.Lend + nop + +.Loop: daddiu $7,$7,-4 + + ld $12,8($5) + daddu $11,$11,$2 + ld $13,8($6) + sltu $8,$11,$2 + daddu $11,$10,$11 + sltu $2,$11,$10 + sd $11,0($4) + or $2,$2,$8 + + ld $10,16($5) + daddu $13,$13,$2 + ld $11,16($6) + sltu $8,$13,$2 + daddu $13,$12,$13 + sltu $2,$13,$12 + sd $13,8($4) + or $2,$2,$8 + + ld $12,24($5) + daddu $11,$11,$2 + ld $13,24($6) + sltu $8,$11,$2 + daddu $11,$10,$11 + sltu $2,$11,$10 + sd $11,16($4) + or $2,$2,$8 + + ld $10,32($5) + daddu $13,$13,$2 + ld $11,32($6) + sltu $8,$13,$2 + daddu $13,$12,$13 + sltu $2,$13,$12 + sd $13,24($4) + or $2,$2,$8 + + daddiu $5,$5,32 + daddiu $6,$6,32 + + bne $7,$0,.Loop + daddiu $4,$4,32 + +.Lend: daddu $11,$11,$2 + sltu $8,$11,$2 + daddu $11,$10,$11 + sltu $2,$11,$10 + sd $11,0($4) + j $31 + or $2,$2,$8 + + .end _gcry_mpih_add_n + diff --git a/comm/third_party/libgcrypt/mpi/mips3/mpih-lshift.S b/comm/third_party/libgcrypt/mpi/mips3/mpih-lshift.S new file mode 100644 index 0000000000..084c109b24 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mips3/mpih-lshift.S @@ -0,0 +1,97 @@ +/* mips3 lshift + * + * Copyright (C) 1995, 1998, 2000, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +/******************* + * mpi_limb_t + * _gcry_mpih_lshift( mpi_ptr_t wp, ($4) + * mpi_ptr_t up, ($5) + * mpi_size_t usize, ($6) + * unsigned cnt) ($7) + */ + + .text + .align 2 + .globl _gcry_mpih_lshift + .ent _gcry_mpih_lshift +_gcry_mpih_lshift: + .set noreorder + .set nomacro + + dsll $2,$6,3 + daddu $5,$5,$2 # make r5 point at end of src + ld $10,-8($5) # load first limb + dsubu $13,$0,$7 + daddu $4,$4,$2 # make r4 point at end of res + daddiu $6,$6,-1 + and $9,$6,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + dsrl $2,$10,$13 # compute function result + + dsubu $6,$6,$9 + +.Loop0: ld $3,-16($5) + daddiu $4,$4,-8 + daddiu $5,$5,-8 + daddiu $9,$9,-1 + dsll $11,$10,$7 + dsrl $12,$3,$13 + move $10,$3 + or $8,$11,$12 + bne $9,$0,.Loop0 + sd $8,0($4) + +.L0: beq $6,$0,.Lend + nop + +.Loop: ld $3,-16($5) + daddiu $4,$4,-32 + daddiu $6,$6,-4 + dsll $11,$10,$7 + dsrl $12,$3,$13 + + ld $10,-24($5) + dsll $14,$3,$7 + or $8,$11,$12 + sd $8,24($4) + dsrl $9,$10,$13 + + ld $3,-32($5) + dsll $11,$10,$7 + or $8,$14,$9 + sd $8,16($4) + dsrl $12,$3,$13 + + ld $10,-40($5) + dsll $14,$3,$7 + or $8,$11,$12 + sd $8,8($4) + dsrl $9,$10,$13 + + daddiu $5,$5,-32 + or $8,$14,$9 + bgtz $6,.Loop + sd $8,0($4) + +.Lend: dsll $8,$10,$7 + j $31 + sd $8,-8($4) + .end _gcry_mpih_lshift diff --git a/comm/third_party/libgcrypt/mpi/mips3/mpih-mul1.S b/comm/third_party/libgcrypt/mpi/mips3/mpih-mul1.S new file mode 100644 index 0000000000..6c0099de3f --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mips3/mpih-mul1.S @@ -0,0 +1,89 @@ +/* mips3 mpih-mul1.S -- Multiply a limb vector with a limb and store + * the result in a second limb vector. + * + * Copyright (C) 1992, 1994, 1995, 1998, 2000 + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + +/******************* + * mpi_limb_t + * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, (r4) + * mpi_ptr_t s1_ptr, (r5) + * mpi_size_t s1_size, (r6) + * mpi_limb_t s2_limb) (r7) + */ + + .text + .align 4 + .globl _gcry_mpih_mul_1 + .ent _gcry_mpih_mul_1 +_gcry_mpih_mul_1: + .set noreorder + .set nomacro + +/* # warm up phase 0 */ + ld $8,0($5) + +/* # warm up phase 1 */ + daddiu $5,$5,8 + dmultu $8,$7 + + daddiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + daddiu $6,$6,-1 + beq $6,$0,$LC1 + ld $8,0($5) # load new s1 limb as early as possible + +Loop: mflo $10 + mfhi $9 + daddiu $5,$5,8 + daddu $10,$10,$2 # add old carry limb to low product limb + dmultu $8,$7 + ld $8,0($5) # load new s1 limb as early as possible + daddiu $6,$6,-1 # decrement loop counter + sltu $2,$10,$2 # carry from previous addition -> $2 + sd $10,0($4) + daddiu $4,$4,8 + bne $6,$0,Loop + daddu $2,$9,$2 # add high product limb and carry from addition + +/* # cool down phase 1 */ +$LC1: mflo $10 + mfhi $9 + daddu $10,$10,$2 + sltu $2,$10,$2 + dmultu $8,$7 + sd $10,0($4) + daddiu $4,$4,8 + daddu $2,$9,$2 # add high product limb and carry from addition + +/* # cool down phase 0 */ +$LC0: mflo $10 + mfhi $9 + daddu $10,$10,$2 + sltu $2,$10,$2 + sd $10,0($4) + j $31 + daddu $2,$9,$2 # add high product limb and carry from addition + + .end _gcry_mpih_mul_1 + diff --git a/comm/third_party/libgcrypt/mpi/mips3/mpih-mul2.S b/comm/third_party/libgcrypt/mpi/mips3/mpih-mul2.S new file mode 100644 index 0000000000..ca8276388f --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mips3/mpih-mul2.S @@ -0,0 +1,101 @@ +/* MIPS3 addmul_1 -- Multiply a limb vector with a single limb and + * add the product to a second limb vector. + * + * Copyright (C) 1992, 1994, 1995, 1998, 2000 + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + +/******************* + * mpi_limb_t + * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, (r4) + * mpi_ptr_t s1_ptr, (r5) + * mpi_size_t s1_size, (r6) + * mpi_limb_t s2_limb) (r7) + */ + + .text + .align 4 + .globl _gcry_mpih_addmul_1 + .ent _gcry_mpih_addmul_1 +_gcry_mpih_addmul_1: + .set noreorder + .set nomacro + +/* # warm up phase 0 */ + ld $8,0($5) + +/* # warm up phase 1 */ + daddiu $5,$5,8 + dmultu $8,$7 + + daddiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + daddiu $6,$6,-1 + beq $6,$0,$LC1 + ld $8,0($5) # load new s1 limb as early as possible + +Loop: ld $10,0($4) + mflo $3 + mfhi $9 + daddiu $5,$5,8 + daddu $3,$3,$2 # add old carry limb to low product limb + dmultu $8,$7 + ld $8,0($5) # load new s1 limb as early as possible + daddiu $6,$6,-1 # decrement loop counter + sltu $2,$3,$2 # carry from previous addition -> $2 + daddu $3,$10,$3 + sltu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + daddiu $4,$4,8 + bne $6,$0,Loop + daddu $2,$9,$2 # add high product limb and carry from addition + +/* # cool down phase 1 */ +$LC1: ld $10,0($4) + mflo $3 + mfhi $9 + daddu $3,$3,$2 + sltu $2,$3,$2 + dmultu $8,$7 + daddu $3,$10,$3 + sltu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + daddiu $4,$4,8 + daddu $2,$9,$2 # add high product limb and carry from addition + +/* # cool down phase 0 */ +$LC0: ld $10,0($4) + mflo $3 + mfhi $9 + daddu $3,$3,$2 + sltu $2,$3,$2 + daddu $3,$10,$3 + sltu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + j $31 + daddu $2,$9,$2 # add high product limb and carry from addition + + .end _gcry_mpih_addmul_1 + diff --git a/comm/third_party/libgcrypt/mpi/mips3/mpih-mul3.S b/comm/third_party/libgcrypt/mpi/mips3/mpih-mul3.S new file mode 100644 index 0000000000..be421a68ee --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mips3/mpih-mul3.S @@ -0,0 +1,101 @@ +/* MIPS3 submul_1 -- Multiply a limb vector with a single limb and + * subtract the product from a second limb vector. + * + * Copyright (C) 1992, 1994, 1995, 1998, 2000 + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + +/******************* + * mpi_limb_t + * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, (r4) + * mpi_ptr_t s1_ptr, (r5) + * mpi_size_t s1_size, (r6) + * mpi_limb_t s2_limb) (r7) + */ + + .text + .align 4 + .globl _gcry_mpih_submul_1 + .ent _gcry_mpih_submul_1 +_gcry_mpih_submul_1: + .set noreorder + .set nomacro + +/* # warm up phase 0 */ + ld $8,0($5) + +/* # warm up phase 1 */ + daddiu $5,$5,8 + dmultu $8,$7 + + daddiu $6,$6,-1 + beq $6,$0,$LC0 + move $2,$0 # zero cy2 + + daddiu $6,$6,-1 + beq $6,$0,$LC1 + ld $8,0($5) # load new s1 limb as early as possible + +Loop: ld $10,0($4) + mflo $3 + mfhi $9 + daddiu $5,$5,8 + daddu $3,$3,$2 # add old carry limb to low product limb + dmultu $8,$7 + ld $8,0($5) # load new s1 limb as early as possible + daddiu $6,$6,-1 # decrement loop counter + sltu $2,$3,$2 # carry from previous addition -> $2 + dsubu $3,$10,$3 + sgtu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + daddiu $4,$4,8 + bne $6,$0,Loop + daddu $2,$9,$2 # add high product limb and carry from addition + +/* # cool down phase 1 */ +$LC1: ld $10,0($4) + mflo $3 + mfhi $9 + daddu $3,$3,$2 + sltu $2,$3,$2 + dmultu $8,$7 + dsubu $3,$10,$3 + sgtu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + daddiu $4,$4,8 + daddu $2,$9,$2 # add high product limb and carry from addition + +/* # cool down phase 0 */ +$LC0: ld $10,0($4) + mflo $3 + mfhi $9 + daddu $3,$3,$2 + sltu $2,$3,$2 + dsubu $3,$10,$3 + sgtu $10,$3,$10 + daddu $2,$2,$10 + sd $3,0($4) + j $31 + daddu $2,$9,$2 # add high product limb and carry from addition + + .end _gcry_mpih_submul_1 + diff --git a/comm/third_party/libgcrypt/mpi/mips3/mpih-rshift.S b/comm/third_party/libgcrypt/mpi/mips3/mpih-rshift.S new file mode 100644 index 0000000000..e7e035a034 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mips3/mpih-rshift.S @@ -0,0 +1,95 @@ +/* mips3 rshift + * + * Copyright (C) 1995, 1998, 2000 + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +/******************* + * mpi_limb_t + * _gcry_mpih_rshift( mpi_ptr_t wp, ($4) + * mpi_ptr_t up, ($5) + * mpi_size_t usize, ($6) + * unsigned cnt) ($7) + */ + + .text + .align 2 + .globl _gcry_mpih_rshift + .ent _gcry_mpih_rshift +_gcry_mpih_rshift: + .set noreorder + .set nomacro + + ld $10,0($5) # load first limb + dsubu $13,$0,$7 + daddiu $6,$6,-1 + and $9,$6,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + dsll $2,$10,$13 # compute function result + + dsubu $6,$6,$9 + +.Loop0: ld $3,8($5) + daddiu $4,$4,8 + daddiu $5,$5,8 + daddiu $9,$9,-1 + dsrl $11,$10,$7 + dsll $12,$3,$13 + move $10,$3 + or $8,$11,$12 + bne $9,$0,.Loop0 + sd $8,-8($4) + +.L0: beq $6,$0,.Lend + nop + +.Loop: ld $3,8($5) + daddiu $4,$4,32 + daddiu $6,$6,-4 + dsrl $11,$10,$7 + dsll $12,$3,$13 + + ld $10,16($5) + dsrl $14,$3,$7 + or $8,$11,$12 + sd $8,-32($4) + dsll $9,$10,$13 + + ld $3,24($5) + dsrl $11,$10,$7 + or $8,$14,$9 + sd $8,-24($4) + dsll $12,$3,$13 + + ld $10,32($5) + dsrl $14,$3,$7 + or $8,$11,$12 + sd $8,-16($4) + dsll $9,$10,$13 + + daddiu $5,$5,32 + or $8,$14,$9 + bgtz $6,.Loop + sd $8,-8($4) + +.Lend: dsrl $8,$10,$7 + j $31 + sd $8,0($4) + .end _gcry_mpih_rshift + diff --git a/comm/third_party/libgcrypt/mpi/mips3/mpih-sub1.S b/comm/third_party/libgcrypt/mpi/mips3/mpih-sub1.S new file mode 100644 index 0000000000..9fac674399 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mips3/mpih-sub1.S @@ -0,0 +1,125 @@ +/* mips3 sub_n -- Subtract two limb vectors of the same length > 0 and + * store difference in a third limb vector. + * + * Copyright (C) 1995, 1998, 1999, 2000, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + +/******************* + * mpi_limb_t + * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, (r4) + * mpi_ptr_t s1_ptr, (r5) + * mpi_ptr_t s2_ptr, (r6) + * mpi_size_t size) (r7) + */ + + + .text + .align 2 + .globl _gcry_mpih_sub_n + .ent _gcry_mpih_sub_n +_gcry_mpih_sub_n: + .set noreorder + .set nomacro + + ld $10,0($5) + ld $11,0($6) + + daddiu $7,$7,-1 + and $9,$7,4-1 # number of limbs in first loop + beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop + move $2,$0 + + dsubu $7,$7,$9 + +.Loop0: daddiu $9,$9,-1 + ld $12,8($5) + daddu $11,$11,$2 + ld $13,8($6) + sltu $8,$11,$2 + dsubu $11,$10,$11 + sltu $2,$10,$11 + sd $11,0($4) + or $2,$2,$8 + + daddiu $5,$5,8 + daddiu $6,$6,8 + move $10,$12 + move $11,$13 + bne $9,$0,.Loop0 + daddiu $4,$4,8 + +.L0: beq $7,$0,.Lend + nop + +.Loop: daddiu $7,$7,-4 + + ld $12,8($5) + daddu $11,$11,$2 + ld $13,8($6) + sltu $8,$11,$2 + dsubu $11,$10,$11 + sltu $2,$10,$11 + sd $11,0($4) + or $2,$2,$8 + + ld $10,16($5) + daddu $13,$13,$2 + ld $11,16($6) + sltu $8,$13,$2 + dsubu $13,$12,$13 + sltu $2,$12,$13 + sd $13,8($4) + or $2,$2,$8 + + ld $12,24($5) + daddu $11,$11,$2 + ld $13,24($6) + sltu $8,$11,$2 + dsubu $11,$10,$11 + sltu $2,$10,$11 + sd $11,16($4) + or $2,$2,$8 + + ld $10,32($5) + daddu $13,$13,$2 + ld $11,32($6) + sltu $8,$13,$2 + dsubu $13,$12,$13 + sltu $2,$12,$13 + sd $13,24($4) + or $2,$2,$8 + + daddiu $5,$5,32 + daddiu $6,$6,32 + + bne $7,$0,.Loop + daddiu $4,$4,32 + +.Lend: daddu $11,$11,$2 + sltu $8,$11,$2 + dsubu $11,$10,$11 + sltu $2,$10,$11 + sd $11,0($4) + j $31 + or $2,$2,$8 + + .end _gcry_mpih_sub_n + diff --git a/comm/third_party/libgcrypt/mpi/mpi-add.c b/comm/third_party/libgcrypt/mpi/mpi-add.c new file mode 100644 index 0000000000..53f476e060 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpi-add.c @@ -0,0 +1,235 @@ +/* mpi-add.c - MPI functions + * Copyright (C) 1994, 1996, 1998, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> + +#include "mpi-internal.h" + + +/**************** + * Add the unsigned integer V to the mpi-integer U and store the + * result in W. U and V may be the same. + */ +void +_gcry_mpi_add_ui (gcry_mpi_t w, gcry_mpi_t u, unsigned long v ) +{ + mpi_ptr_t wp, up; + mpi_size_t usize, wsize; + int usign, wsign; + + usize = u->nlimbs; + usign = u->sign; + wsign = 0; + + /* If not space for W (and possible carry), increase space. */ + wsize = usize + 1; + if( w->alloced < wsize ) + mpi_resize(w, wsize); + + /* These must be after realloc (U may be the same as W). */ + up = u->d; + wp = w->d; + + if( !usize ) { /* simple */ + wp[0] = v; + wsize = v? 1:0; + } + else if( !usign ) { /* mpi is not negative */ + mpi_limb_t cy; + cy = _gcry_mpih_add_1(wp, up, usize, v); + wp[usize] = cy; + wsize = usize + cy; + } + else { /* The signs are different. Need exact comparison to determine + * which operand to subtract from which. */ + if( usize == 1 && up[0] < v ) { + wp[0] = v - up[0]; + wsize = 1; + } + else { + _gcry_mpih_sub_1(wp, up, usize, v); + /* Size can decrease with at most one limb. */ + wsize = usize - (wp[usize-1]==0); + wsign = 1; + } + } + + w->nlimbs = wsize; + w->sign = wsign; +} + + +void +_gcry_mpi_add(gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v) +{ + mpi_ptr_t wp, up, vp; + mpi_size_t usize, vsize, wsize; + int usign, vsign, wsign; + + if( u->nlimbs < v->nlimbs ) { /* Swap U and V. */ + usize = v->nlimbs; + usign = v->sign; + vsize = u->nlimbs; + vsign = u->sign; + wsize = usize + 1; + RESIZE_IF_NEEDED(w, wsize); + /* These must be after realloc (u or v may be the same as w). */ + up = v->d; + vp = u->d; + } + else { + usize = u->nlimbs; + usign = u->sign; + vsize = v->nlimbs; + vsign = v->sign; + wsize = usize + 1; + RESIZE_IF_NEEDED(w, wsize); + /* These must be after realloc (u or v may be the same as w). */ + up = u->d; + vp = v->d; + } + wp = w->d; + wsign = 0; + + if( !vsize ) { /* simple */ + MPN_COPY(wp, up, usize ); + wsize = usize; + wsign = usign; + } + else if( usign != vsign ) { /* different sign */ + /* This test is right since USIZE >= VSIZE */ + if( usize != vsize ) { + _gcry_mpih_sub(wp, up, usize, vp, vsize); + wsize = usize; + MPN_NORMALIZE(wp, wsize); + wsign = usign; + } + else if( _gcry_mpih_cmp(up, vp, usize) < 0 ) { + _gcry_mpih_sub_n(wp, vp, up, usize); + wsize = usize; + MPN_NORMALIZE(wp, wsize); + if( !usign ) + wsign = 1; + } + else { + _gcry_mpih_sub_n(wp, up, vp, usize); + wsize = usize; + MPN_NORMALIZE(wp, wsize); + if( usign ) + wsign = 1; + } + } + else { /* U and V have same sign. Add them. */ + mpi_limb_t cy = _gcry_mpih_add(wp, up, usize, vp, vsize); + wp[usize] = cy; + wsize = usize + cy; + if( usign ) + wsign = 1; + } + + w->nlimbs = wsize; + w->sign = wsign; +} + + +/**************** + * Subtract the unsigned integer V from the mpi-integer U and store the + * result in W. + */ +void +_gcry_mpi_sub_ui(gcry_mpi_t w, gcry_mpi_t u, unsigned long v ) +{ + mpi_ptr_t wp, up; + mpi_size_t usize, wsize; + int usign, wsign; + + usize = u->nlimbs; + usign = u->sign; + wsign = 0; + + /* If not space for W (and possible carry), increase space. */ + wsize = usize + 1; + if( w->alloced < wsize ) + mpi_resize(w, wsize); + + /* These must be after realloc (U may be the same as W). */ + up = u->d; + wp = w->d; + + if( !usize ) { /* simple */ + wp[0] = v; + wsize = v? 1:0; + wsign = 1; + } + else if( usign ) { /* mpi and v are negative */ + mpi_limb_t cy; + cy = _gcry_mpih_add_1(wp, up, usize, v); + wp[usize] = cy; + wsize = usize + cy; + } + else { /* The signs are different. Need exact comparison to determine + * which operand to subtract from which. */ + if( usize == 1 && up[0] < v ) { + wp[0] = v - up[0]; + wsize = 1; + wsign = 1; + } + else { + _gcry_mpih_sub_1(wp, up, usize, v); + /* Size can decrease with at most one limb. */ + wsize = usize - (wp[usize-1]==0); + } + } + + w->nlimbs = wsize; + w->sign = wsign; +} + +void +_gcry_mpi_sub(gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v) +{ + gcry_mpi_t vv = mpi_copy (v); + vv->sign = ! vv->sign; + mpi_add (w, u, vv); + mpi_free (vv); +} + + +void +_gcry_mpi_addm( gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m) +{ + mpi_add (w, u, v); + mpi_mod (w, w, m); +} + +void +_gcry_mpi_subm( gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m) +{ + mpi_sub (w, u, v); + mpi_mod (w, w, m); +} diff --git a/comm/third_party/libgcrypt/mpi/mpi-bit.c b/comm/third_party/libgcrypt/mpi/mpi-bit.c new file mode 100644 index 0000000000..e2170401e4 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpi-bit.c @@ -0,0 +1,411 @@ +/* mpi-bit.c - MPI bit level functions + * Copyright (C) 1998, 1999, 2001, 2002, 2006 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "longlong.h" + + +#ifdef MPI_INTERNAL_NEED_CLZ_TAB +#ifdef __STDC__ +const +#endif +unsigned char +_gcry_clz_tab[] = +{ + 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +}; +#endif + + +#define A_LIMB_1 ((mpi_limb_t)1) + + +/**************** + * Sometimes we have MSL (most significant limbs) which are 0; + * this is for some reasons not good, so this function removes them. + */ +void +_gcry_mpi_normalize( gcry_mpi_t a ) +{ + if( mpi_is_opaque(a) ) + return; + + for( ; a->nlimbs && !a->d[a->nlimbs-1]; a->nlimbs-- ) + ; +} + + + +/**************** + * Return the number of bits in A. + */ +unsigned int +_gcry_mpi_get_nbits (gcry_mpi_t a) +{ + unsigned n; + + if( mpi_is_opaque(a) ) { + return a->sign; /* which holds the number of bits */ + } + + _gcry_mpi_normalize( a ); + if( a->nlimbs ) { + mpi_limb_t alimb = a->d[a->nlimbs-1]; + if( alimb ) + count_leading_zeros( n, alimb ); + else + n = BITS_PER_MPI_LIMB; + n = BITS_PER_MPI_LIMB - n + (a->nlimbs-1) * BITS_PER_MPI_LIMB; + } + else + n = 0; + return n; +} + + +/**************** + * Test whether bit N is set. + */ +int +_gcry_mpi_test_bit( gcry_mpi_t a, unsigned int n ) +{ + unsigned int limbno, bitno; + mpi_limb_t limb; + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if( limbno >= a->nlimbs ) + return 0; /* too far left: this is a 0 */ + limb = a->d[limbno]; + return (limb & (A_LIMB_1 << bitno))? 1: 0; +} + + +/**************** + * Set bit N of A. + */ +void +_gcry_mpi_set_bit( gcry_mpi_t a, unsigned int n ) +{ + unsigned int i, limbno, bitno; + + if (mpi_is_immutable (a)) + { + mpi_immutable_failed (); + return; + } + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if ( limbno >= a->nlimbs ) + { + for (i=a->nlimbs; i < a->alloced; i++) + a->d[i] = 0; + mpi_resize (a, limbno+1 ); + a->nlimbs = limbno+1; + } + a->d[limbno] |= (A_LIMB_1<<bitno); +} + +/**************** + * Set bit N of A. and clear all bits above + */ +void +_gcry_mpi_set_highbit( gcry_mpi_t a, unsigned int n ) +{ + unsigned int i, limbno, bitno; + + if (mpi_is_immutable (a)) + { + mpi_immutable_failed (); + return; + } + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if ( limbno >= a->nlimbs ) + { + for (i=a->nlimbs; i < a->alloced; i++) + a->d[i] = 0; + mpi_resize (a, limbno+1 ); + a->nlimbs = limbno+1; + } + a->d[limbno] |= (A_LIMB_1<<bitno); + for ( bitno++; bitno < BITS_PER_MPI_LIMB; bitno++ ) + a->d[limbno] &= ~(A_LIMB_1 << bitno); + a->nlimbs = limbno+1; +} + +/**************** + * clear bit N of A and all bits above + */ +void +_gcry_mpi_clear_highbit( gcry_mpi_t a, unsigned int n ) +{ + unsigned int limbno, bitno; + + if (mpi_is_immutable (a)) + { + mpi_immutable_failed (); + return; + } + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if( limbno >= a->nlimbs ) + return; /* not allocated, therefore no need to clear bits :-) */ + + for( ; bitno < BITS_PER_MPI_LIMB; bitno++ ) + a->d[limbno] &= ~(A_LIMB_1 << bitno); + a->nlimbs = limbno+1; +} + +/**************** + * Clear bit N of A. + */ +void +_gcry_mpi_clear_bit( gcry_mpi_t a, unsigned int n ) +{ + unsigned int limbno, bitno; + + if (mpi_is_immutable (a)) + { + mpi_immutable_failed (); + return; + } + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if (limbno >= a->nlimbs) + return; /* Don't need to clear this bit, it's far too left. */ + a->d[limbno] &= ~(A_LIMB_1 << bitno); +} + + +/**************** + * Shift A by COUNT limbs to the right + * This is used only within the MPI library + */ +void +_gcry_mpi_rshift_limbs( gcry_mpi_t a, unsigned int count ) +{ + mpi_ptr_t ap = a->d; + mpi_size_t n = a->nlimbs; + unsigned int i; + + if (mpi_is_immutable (a)) + { + mpi_immutable_failed (); + return; + } + + if (count >= n) + { + a->nlimbs = 0; + return; + } + + for( i = 0; i < n - count; i++ ) + ap[i] = ap[i+count]; + ap[i] = 0; + a->nlimbs -= count; +} + + +/* + * Shift A by N bits to the right. + */ +void +_gcry_mpi_rshift ( gcry_mpi_t x, gcry_mpi_t a, unsigned int n ) +{ + mpi_size_t xsize; + unsigned int i; + unsigned int nlimbs = (n/BITS_PER_MPI_LIMB); + unsigned int nbits = (n%BITS_PER_MPI_LIMB); + + if (mpi_is_immutable (x)) + { + mpi_immutable_failed (); + return; + } + + if ( x == a ) + { + /* In-place operation. */ + if ( nlimbs >= x->nlimbs ) + { + x->nlimbs = 0; + return; + } + + if (nlimbs) + { + for (i=0; i < x->nlimbs - nlimbs; i++ ) + x->d[i] = x->d[i+nlimbs]; + x->d[i] = 0; + x->nlimbs -= nlimbs; + + } + if ( x->nlimbs && nbits ) + _gcry_mpih_rshift ( x->d, x->d, x->nlimbs, nbits ); + } + else if ( nlimbs ) + { + /* Copy and shift by more or equal bits than in a limb. */ + xsize = a->nlimbs; + x->sign = a->sign; + RESIZE_IF_NEEDED (x, xsize); + x->nlimbs = xsize; + for (i=0; i < a->nlimbs; i++ ) + x->d[i] = a->d[i]; + x->nlimbs = i; + + if ( nlimbs >= x->nlimbs ) + { + x->nlimbs = 0; + return; + } + + if (nlimbs) + { + for (i=0; i < x->nlimbs - nlimbs; i++ ) + x->d[i] = x->d[i+nlimbs]; + x->d[i] = 0; + x->nlimbs -= nlimbs; + } + + if ( x->nlimbs && nbits ) + _gcry_mpih_rshift ( x->d, x->d, x->nlimbs, nbits ); + } + else + { + /* Copy and shift by less than bits in a limb. */ + xsize = a->nlimbs; + x->sign = a->sign; + RESIZE_IF_NEEDED (x, xsize); + x->nlimbs = xsize; + + if ( xsize ) + { + if (nbits ) + _gcry_mpih_rshift (x->d, a->d, x->nlimbs, nbits ); + else + { + /* The rshift helper function is not specified for + NBITS==0, thus we do a plain copy here. */ + for (i=0; i < x->nlimbs; i++ ) + x->d[i] = a->d[i]; + } + } + } + MPN_NORMALIZE (x->d, x->nlimbs); +} + + +/**************** + * Shift A by COUNT limbs to the left + * This is used only within the MPI library + */ +void +_gcry_mpi_lshift_limbs (gcry_mpi_t a, unsigned int count) +{ + mpi_ptr_t ap; + int n = a->nlimbs; + int i; + + if (!count || !n) + return; + + RESIZE_IF_NEEDED (a, n+count); + + ap = a->d; + for (i = n-1; i >= 0; i--) + ap[i+count] = ap[i]; + for (i=0; i < count; i++ ) + ap[i] = 0; + a->nlimbs += count; +} + + +/* + * Shift A by N bits to the left. + */ +void +_gcry_mpi_lshift ( gcry_mpi_t x, gcry_mpi_t a, unsigned int n ) +{ + unsigned int nlimbs = (n/BITS_PER_MPI_LIMB); + unsigned int nbits = (n%BITS_PER_MPI_LIMB); + + if (mpi_is_immutable (x)) + { + mpi_immutable_failed (); + return; + } + + if (x == a && !n) + return; /* In-place shift with an amount of zero. */ + + if ( x != a ) + { + /* Copy A to X. */ + unsigned int alimbs = a->nlimbs; + int asign = a->sign; + mpi_ptr_t xp, ap; + + RESIZE_IF_NEEDED (x, alimbs+nlimbs+1); + xp = x->d; + ap = a->d; + MPN_COPY (xp, ap, alimbs); + x->nlimbs = alimbs; + x->flags = a->flags; + x->sign = asign; + } + + if (nlimbs && !nbits) + { + /* Shift a full number of limbs. */ + _gcry_mpi_lshift_limbs (x, nlimbs); + } + else if (n) + { + /* We use a very dump approach: Shift left by the number of + limbs plus one and than fix it up by an rshift. */ + _gcry_mpi_lshift_limbs (x, nlimbs+1); + mpi_rshift (x, x, BITS_PER_MPI_LIMB - nbits); + } + + MPN_NORMALIZE (x->d, x->nlimbs); +} diff --git a/comm/third_party/libgcrypt/mpi/mpi-cmp.c b/comm/third_party/libgcrypt/mpi/mpi-cmp.c new file mode 100644 index 0000000000..8927fa0ecb --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpi-cmp.c @@ -0,0 +1,130 @@ +/* mpi-cmp.c - MPI functions + * Copyright (C) 1998, 1999, 2001, 2002, 2005 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" + +int +_gcry_mpi_cmp_ui (gcry_mpi_t u, unsigned long v) +{ + mpi_limb_t limb = v; + + _gcry_mpi_normalize (u); + + /* Handle the case that U contains no limb. */ + if (u->nlimbs == 0) + return -(limb != 0); + + /* Handle the case that U is negative. */ + if (u->sign) + return -1; + + if (u->nlimbs == 1) + { + /* Handle the case that U contains exactly one limb. */ + + if (u->d[0] > limb) + return 1; + if (u->d[0] < limb) + return -1; + return 0; + } + else + /* Handle the case that U contains more than one limb. */ + return 1; +} + + +/* Helper for _gcry_mpi_cmp and _gcry_mpi_cmpabs. */ +static int +do_mpi_cmp (gcry_mpi_t u, gcry_mpi_t v, int absmode) +{ + mpi_size_t usize; + mpi_size_t vsize; + int usign; + int vsign; + int cmp; + + if (mpi_is_opaque (u) || mpi_is_opaque (v)) + { + /* We have no signan and thus ABSMODE has no efeect here. */ + if (mpi_is_opaque (u) && !mpi_is_opaque (v)) + return -1; + if (!mpi_is_opaque (u) && mpi_is_opaque (v)) + return 1; + if (!u->sign && !v->sign) + return 0; /* Empty buffers are identical. */ + if (u->sign < v->sign) + return -1; + if (u->sign > v->sign) + return 1; + return memcmp (u->d, v->d, (u->sign+7)/8); + } + else + { + _gcry_mpi_normalize (u); + _gcry_mpi_normalize (v); + + usize = u->nlimbs; + vsize = v->nlimbs; + usign = absmode? 0 : u->sign; + vsign = absmode? 0 : v->sign; + + /* Special treatment for +0 == -0 */ + if (!usize && !vsize) + return 0; + + /* Compare sign bits. */ + if (!usign && vsign) + return 1; + if (usign && !vsign) + return -1; + + /* U and V are either both positive or both negative. */ + + if (usize != vsize && !usign && !vsign) + return usize - vsize; + if (usize != vsize && usign && vsign) + return vsize + usize; + if (!usize ) + return 0; + if (!(cmp = _gcry_mpih_cmp (u->d, v->d, usize))) + return 0; + if ((cmp < 0?1:0) == (usign?1:0)) + return 1; + } + return -1; +} + + +int +_gcry_mpi_cmp (gcry_mpi_t u, gcry_mpi_t v) +{ + return do_mpi_cmp (u, v, 0); +} + +/* Compare only the absolute values. */ +int +_gcry_mpi_cmpabs (gcry_mpi_t u, gcry_mpi_t v) +{ + return do_mpi_cmp (u, v, 1); +} diff --git a/comm/third_party/libgcrypt/mpi/mpi-div.c b/comm/third_party/libgcrypt/mpi/mpi-div.c new file mode 100644 index 0000000000..166ab87519 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpi-div.c @@ -0,0 +1,360 @@ +/* mpi-div.c - MPI functions + * Copyright (C) 1994, 1996, 1998, 2001, 2002, + * 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "longlong.h" +#include "g10lib.h" + + +void +_gcry_mpi_fdiv_r( gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor ) +{ + int divisor_sign = divisor->sign; + gcry_mpi_t temp_divisor = NULL; + + /* We need the original value of the divisor after the remainder has been + * preliminary calculated. We have to copy it to temporary space if it's + * the same variable as REM. */ + if( rem == divisor ) { + temp_divisor = mpi_copy( divisor ); + divisor = temp_divisor; + } + + _gcry_mpi_tdiv_r( rem, dividend, divisor ); + + if( ((divisor_sign?1:0) ^ (dividend->sign?1:0)) && rem->nlimbs ) + mpi_add (rem, rem, divisor); + + if( temp_divisor ) + mpi_free(temp_divisor); +} + + + +/**************** + * Division rounding the quotient towards -infinity. + * The remainder gets the same sign as the denominator. + * rem is optional + */ + +unsigned long +_gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, + unsigned long divisor ) +{ + mpi_limb_t rlimb; + + rlimb = _gcry_mpih_mod_1( dividend->d, dividend->nlimbs, divisor ); + if( rlimb && dividend->sign ) + rlimb = divisor - rlimb; + + if( rem ) { + rem->d[0] = rlimb; + rem->nlimbs = rlimb? 1:0; + } + return rlimb; +} + + +void +_gcry_mpi_fdiv_q( gcry_mpi_t quot, gcry_mpi_t dividend, gcry_mpi_t divisor ) +{ + gcry_mpi_t tmp = mpi_alloc( mpi_get_nlimbs(quot) ); + _gcry_mpi_fdiv_qr( quot, tmp, dividend, divisor); + mpi_free(tmp); +} + +void +_gcry_mpi_fdiv_qr( gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor ) +{ + int divisor_sign = divisor->sign; + gcry_mpi_t temp_divisor = NULL; + + if( quot == divisor || rem == divisor ) { + temp_divisor = mpi_copy( divisor ); + divisor = temp_divisor; + } + + _gcry_mpi_tdiv_qr( quot, rem, dividend, divisor ); + + if( (divisor_sign ^ dividend->sign) && rem->nlimbs ) { + mpi_sub_ui( quot, quot, 1 ); + mpi_add( rem, rem, divisor); + } + + if( temp_divisor ) + mpi_free(temp_divisor); +} + + +/* If den == quot, den needs temporary storage. + * If den == rem, den needs temporary storage. + * If num == quot, num needs temporary storage. + * If den has temporary storage, it can be normalized while being copied, + * i.e no extra storage should be allocated. + */ + +void +_gcry_mpi_tdiv_r( gcry_mpi_t rem, gcry_mpi_t num, gcry_mpi_t den) +{ + _gcry_mpi_tdiv_qr(NULL, rem, num, den ); +} + +void +_gcry_mpi_tdiv_qr( gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t num, gcry_mpi_t den) +{ + mpi_ptr_t np, dp; + mpi_ptr_t qp, rp; + mpi_size_t nsize = num->nlimbs; + mpi_size_t dsize = den->nlimbs; + mpi_size_t qsize, rsize; + mpi_size_t sign_remainder = num->sign; + mpi_size_t sign_quotient = num->sign ^ den->sign; + unsigned normalization_steps; + mpi_limb_t q_limb; + mpi_ptr_t marker[5]; + unsigned int marker_nlimbs[5]; + int markidx=0; + + /* Ensure space is enough for quotient and remainder. + * We need space for an extra limb in the remainder, because it's + * up-shifted (normalized) below. */ + rsize = nsize + 1; + mpi_resize( rem, rsize); + + qsize = rsize - dsize; /* qsize cannot be bigger than this. */ + if( qsize <= 0 ) { + if( num != rem ) { + rem->nlimbs = num->nlimbs; + rem->sign = num->sign; + MPN_COPY(rem->d, num->d, nsize); + } + if( quot ) { + /* This needs to follow the assignment to rem, in case the + * numerator and quotient are the same. */ + quot->nlimbs = 0; + quot->sign = 0; + } + return; + } + + if( quot ) + mpi_resize( quot, qsize); + + if (!dsize) + _gcry_divide_by_zero(); + + /* Read pointers here, when reallocation is finished. */ + np = num->d; + dp = den->d; + rp = rem->d; + + /* Optimize division by a single-limb divisor. */ + if( dsize == 1 ) { + mpi_limb_t rlimb; + if( quot ) { + qp = quot->d; + rlimb = _gcry_mpih_divmod_1( qp, np, nsize, dp[0] ); + qsize -= qp[qsize - 1] == 0; + quot->nlimbs = qsize; + quot->sign = sign_quotient; + } + else + rlimb = _gcry_mpih_mod_1( np, nsize, dp[0] ); + rp[0] = rlimb; + rsize = rlimb != 0?1:0; + rem->nlimbs = rsize; + rem->sign = sign_remainder; + return; + } + + + if( quot ) { + qp = quot->d; + /* Make sure QP and NP point to different objects. Otherwise the + * numerator would be gradually overwritten by the quotient limbs. */ + if(qp == np) { /* Copy NP object to temporary space. */ + marker_nlimbs[markidx] = nsize; + np = marker[markidx++] = mpi_alloc_limb_space(nsize, + mpi_is_secure(quot)); + MPN_COPY(np, qp, nsize); + } + } + else /* Put quotient at top of remainder. */ + qp = rp + dsize; + + count_leading_zeros( normalization_steps, dp[dsize - 1] ); + + /* Normalize the denominator, i.e. make its most significant bit set by + * shifting it NORMALIZATION_STEPS bits to the left. Also shift the + * numerator the same number of steps (to keep the quotient the same!). + */ + if( normalization_steps ) { + mpi_ptr_t tp; + mpi_limb_t nlimb; + + /* Shift up the denominator setting the most significant bit of + * the most significant word. Use temporary storage not to clobber + * the original contents of the denominator. */ + marker_nlimbs[markidx] = dsize; + tp = marker[markidx++] = mpi_alloc_limb_space(dsize,mpi_is_secure(den)); + _gcry_mpih_lshift( tp, dp, dsize, normalization_steps ); + dp = tp; + + /* Shift up the numerator, possibly introducing a new most + * significant word. Move the shifted numerator in the remainder + * meanwhile. */ + nlimb = _gcry_mpih_lshift(rp, np, nsize, normalization_steps); + if( nlimb ) { + rp[nsize] = nlimb; + rsize = nsize + 1; + } + else + rsize = nsize; + } + else { + /* The denominator is already normalized, as required. Copy it to + * temporary space if it overlaps with the quotient or remainder. */ + if( dp == rp || (quot && (dp == qp))) { + mpi_ptr_t tp; + + marker_nlimbs[markidx] = dsize; + tp = marker[markidx++] = mpi_alloc_limb_space(dsize, + mpi_is_secure(den)); + MPN_COPY( tp, dp, dsize ); + dp = tp; + } + + /* Move the numerator to the remainder. */ + if( rp != np ) + MPN_COPY(rp, np, nsize); + + rsize = nsize; + } + + q_limb = _gcry_mpih_divrem( qp, 0, rp, rsize, dp, dsize ); + + if( quot ) { + qsize = rsize - dsize; + if(q_limb) { + qp[qsize] = q_limb; + qsize += 1; + } + + quot->nlimbs = qsize; + quot->sign = sign_quotient; + } + + rsize = dsize; + MPN_NORMALIZE (rp, rsize); + + if( normalization_steps && rsize ) { + _gcry_mpih_rshift(rp, rp, rsize, normalization_steps); + rsize -= rp[rsize - 1] == 0?1:0; + } + + rem->nlimbs = rsize; + rem->sign = sign_remainder; + while( markidx ) + { + markidx--; + _gcry_mpi_free_limb_space (marker[markidx], marker_nlimbs[markidx]); + } +} + +void +_gcry_mpi_tdiv_q_2exp( gcry_mpi_t w, gcry_mpi_t u, unsigned int count ) +{ + mpi_size_t usize, wsize; + mpi_size_t limb_cnt; + + usize = u->nlimbs; + limb_cnt = count / BITS_PER_MPI_LIMB; + wsize = usize - limb_cnt; + if( limb_cnt >= usize ) + w->nlimbs = 0; + else { + mpi_ptr_t wp; + mpi_ptr_t up; + + RESIZE_IF_NEEDED( w, wsize ); + wp = w->d; + up = u->d; + + count %= BITS_PER_MPI_LIMB; + if( count ) { + _gcry_mpih_rshift( wp, up + limb_cnt, wsize, count ); + wsize -= !wp[wsize - 1]; + } + else { + MPN_COPY_INCR( wp, up + limb_cnt, wsize); + } + + w->nlimbs = wsize; + } +} + +/**************** + * Check whether dividend is divisible by divisor + * (note: divisor must fit into a limb) + */ +int +_gcry_mpi_divisible_ui(gcry_mpi_t dividend, unsigned long divisor ) +{ + return !_gcry_mpih_mod_1( dividend->d, dividend->nlimbs, divisor ); +} + + +void +_gcry_mpi_div (gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t dividend, + gcry_mpi_t divisor, int round) +{ + if (!round) + { + if (!rem) + { + gcry_mpi_t tmp = mpi_alloc (mpi_get_nlimbs(quot)); + _gcry_mpi_tdiv_qr (quot, tmp, dividend, divisor); + mpi_free (tmp); + } + else + _gcry_mpi_tdiv_qr (quot, rem, dividend, divisor); + } + else if (round < 0) + { + if (!rem) + _gcry_mpi_fdiv_q (quot, dividend, divisor); + else if (!quot) + _gcry_mpi_fdiv_r (rem, dividend, divisor); + else + _gcry_mpi_fdiv_qr (quot, rem, dividend, divisor); + } + else + log_bug ("mpi rounding to ceiling not yet implemented\n"); +} diff --git a/comm/third_party/libgcrypt/mpi/mpi-gcd.c b/comm/third_party/libgcrypt/mpi/mpi-gcd.c new file mode 100644 index 0000000000..77ca05a6fc --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpi-gcd.c @@ -0,0 +1,52 @@ +/* mpi-gcd.c - MPI functions + * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" + +/**************** + * Find the greatest common divisor G of A and B. + * Return: true if this 1, false in all other cases + */ +int +_gcry_mpi_gcd (gcry_mpi_t g, gcry_mpi_t xa, gcry_mpi_t xb) +{ + gcry_mpi_t a, b; + + a = mpi_copy(xa); + b = mpi_copy(xb); + + /* TAOCP Vol II, 4.5.2, Algorithm A */ + a->sign = 0; + b->sign = 0; + while (mpi_cmp_ui (b, 0)) + { + _gcry_mpi_fdiv_r( g, a, b ); /* G is used as temporary variable. */ + mpi_set(a,b); + mpi_set(b,g); + } + mpi_set(g, a); + + mpi_free(a); + mpi_free(b); + return !mpi_cmp_ui( g, 1); +} diff --git a/comm/third_party/libgcrypt/mpi/mpi-inline.c b/comm/third_party/libgcrypt/mpi/mpi-inline.c new file mode 100644 index 0000000000..39e2222479 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpi-inline.c @@ -0,0 +1,35 @@ +/* mpi-inline.c + * Copyright (C) 1999, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> + +/* put the inline functions as real functions into the lib */ +#define G10_MPI_INLINE_DECL + +#include "mpi-internal.h" + +/* always include the header because it is only + * included by mpi-internal if __GCC__ is defined but we + * need it here in all cases and the above definition of + * of the macro allows us to do so + */ +#include "mpi-inline.h" diff --git a/comm/third_party/libgcrypt/mpi/mpi-inline.h b/comm/third_party/libgcrypt/mpi/mpi-inline.h new file mode 100644 index 0000000000..94e2aec8a1 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpi-inline.h @@ -0,0 +1,161 @@ +/* mpi-inline.h - Internal to the Multi Precision Integers + * Copyright (C) 1994, 1996, 1998, 1999, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#ifndef G10_MPI_INLINE_H +#define G10_MPI_INLINE_H + +/* Starting with gcc 4.3 "extern inline" conforms in c99 mode to the + c99 semantics. To keep the useful old semantics we use an + attribute. */ +#ifndef G10_MPI_INLINE_DECL +# ifdef __GNUC_STDC_INLINE__ +# define G10_MPI_INLINE_DECL extern inline __attribute__ ((__gnu_inline__)) +# else +# define G10_MPI_INLINE_DECL extern __inline__ +# endif +#endif + +G10_MPI_INLINE_DECL mpi_limb_t +_gcry_mpih_add_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t x; + + x = *s1_ptr++; + s2_limb += x; + *res_ptr++ = s2_limb; + if( s2_limb < x ) { /* sum is less than the left operand: handle carry */ + while( --s1_size ) { + x = *s1_ptr++ + 1; /* add carry */ + *res_ptr++ = x; /* and store */ + if( x ) /* not 0 (no overflow): we can stop */ + goto leave; + } + return 1; /* return carry (size of s1 to small) */ + } + + leave: + if( res_ptr != s1_ptr ) { /* not the same variable */ + mpi_size_t i; /* copy the rest */ + for( i=0; i < s1_size-1; i++ ) + res_ptr[i] = s1_ptr[i]; + } + return 0; /* no carry */ +} + + + +G10_MPI_INLINE_DECL mpi_limb_t +_gcry_mpih_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size) +{ + mpi_limb_t cy = 0; + + if( s2_size ) + cy = _gcry_mpih_add_n( res_ptr, s1_ptr, s2_ptr, s2_size ); + + if( s1_size - s2_size ) + cy = _gcry_mpih_add_1( res_ptr + s2_size, s1_ptr + s2_size, + s1_size - s2_size, cy); + return cy; +} + + +G10_MPI_INLINE_DECL mpi_limb_t +_gcry_mpih_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb ) +{ + mpi_limb_t x; + + x = *s1_ptr++; + s2_limb = x - s2_limb; + *res_ptr++ = s2_limb; + if( s2_limb > x ) { + while( --s1_size ) { + x = *s1_ptr++; + *res_ptr++ = x - 1; + if( x ) + goto leave; + } + return 1; + } + + leave: + if( res_ptr != s1_ptr ) { + mpi_size_t i; + for( i=0; i < s1_size-1; i++ ) + res_ptr[i] = s1_ptr[i]; + } + return 0; +} + + + +G10_MPI_INLINE_DECL mpi_limb_t +_gcry_mpih_sub( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size) +{ + mpi_limb_t cy = 0; + + if( s2_size ) + cy = _gcry_mpih_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size); + + if( s1_size - s2_size ) + cy = _gcry_mpih_sub_1(res_ptr + s2_size, s1_ptr + s2_size, + s1_size - s2_size, cy); + return cy; +} + +/**************** + * Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE. + * There are no restrictions on the relative sizes of + * the two arguments. + * Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2. + */ +G10_MPI_INLINE_DECL int +_gcry_mpih_cmp( mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size ) +{ + mpi_size_t i; + mpi_limb_t op1_word, op2_word; + + for( i = size - 1; i >= 0 ; i--) { + op1_word = op1_ptr[i]; + op2_word = op2_ptr[i]; + if( op1_word != op2_word ) + goto diff; + } + return 0; + + diff: + /* This can *not* be simplified to + * op2_word - op2_word + * since that expression might give signed overflow. */ + return (op1_word > op2_word) ? 1 : -1; +} + + +#endif /*G10_MPI_INLINE_H*/ diff --git a/comm/third_party/libgcrypt/mpi/mpi-internal.h b/comm/third_party/libgcrypt/mpi/mpi-internal.h new file mode 100644 index 0000000000..8ccdeada55 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpi-internal.h @@ -0,0 +1,300 @@ +/* mpi-internal.h - Internal to the Multi Precision Integers + * Copyright (C) 1994, 1996, 1998, 2000, 2002, + * 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#ifndef G10_MPI_INTERNAL_H +#define G10_MPI_INTERNAL_H + +#include "mpi-asm-defs.h" + +#ifndef BITS_PER_MPI_LIMB +#if BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_INT + typedef unsigned int mpi_limb_t; + typedef signed int mpi_limb_signed_t; +#elif BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_LONG + typedef unsigned long int mpi_limb_t; + typedef signed long int mpi_limb_signed_t; +#elif BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_LONG_LONG + typedef unsigned long long int mpi_limb_t; + typedef signed long long int mpi_limb_signed_t; +#elif BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_SHORT + typedef unsigned short int mpi_limb_t; + typedef signed short int mpi_limb_signed_t; +#else +#error BYTES_PER_MPI_LIMB does not match any C type +#endif +#define BITS_PER_MPI_LIMB (8*BYTES_PER_MPI_LIMB) +#endif /*BITS_PER_MPI_LIMB*/ + +#include "mpi.h" + +/* If KARATSUBA_THRESHOLD is not already defined, define it to a + * value which is good on most machines. */ + +/* tested 4, 16, 32 and 64, where 16 gave the best performance when + * checking a 768 and a 1024 bit ElGamal signature. + * (wk 22.12.97) */ +#ifndef KARATSUBA_THRESHOLD +#define KARATSUBA_THRESHOLD 16 +#endif + +/* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */ +#if KARATSUBA_THRESHOLD < 2 +#undef KARATSUBA_THRESHOLD +#define KARATSUBA_THRESHOLD 2 +#endif + + +typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */ +typedef int mpi_size_t; /* (must be a signed type) */ + +#define ABS(x) (x >= 0 ? x : -x) +#define MIN(l,o) ((l) < (o) ? (l) : (o)) +#define MAX(h,i) ((h) > (i) ? (h) : (i)) +#define RESIZE_IF_NEEDED(a,b) \ + do { \ + if( (a)->alloced < (b) ) \ + mpi_resize((a), (b)); \ + } while(0) + +/* Copy N limbs from S to D. */ +#define MPN_COPY( d, s, n) \ + do { \ + mpi_size_t _i; \ + for( _i = 0; _i < (n); _i++ ) \ + (d)[_i] = (s)[_i]; \ + } while(0) + +#define MPN_COPY_INCR( d, s, n) \ + do { \ + mpi_size_t _i; \ + for( _i = 0; _i < (n); _i++ ) \ + (d)[_i] = (s)[_i]; \ + } while (0) + +#define MPN_COPY_DECR( d, s, n ) \ + do { \ + mpi_size_t _i; \ + for( _i = (n)-1; _i >= 0; _i--) \ + (d)[_i] = (s)[_i]; \ + } while(0) + +/* Zero N limbs at D */ +#define MPN_ZERO(d, n) \ + do { \ + int _i; \ + for( _i = 0; _i < (n); _i++ ) \ + (d)[_i] = 0; \ + } while (0) + +#define MPN_NORMALIZE(d, n) \ + do { \ + while( (n) > 0 ) { \ + if( (d)[(n)-1] ) \ + break; \ + (n)--; \ + } \ + } while(0) + +#define MPN_NORMALIZE_NOT_ZERO(d, n) \ + do { \ + for(;;) { \ + if( (d)[(n)-1] ) \ + break; \ + (n)--; \ + } \ + } while(0) + +#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ + do { \ + if( (size) < KARATSUBA_THRESHOLD ) \ + mul_n_basecase (prodp, up, vp, size); \ + else \ + mul_n (prodp, up, vp, size, tspace); \ + } while (0); + + +/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest + * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB). + * If this would yield overflow, DI should be the largest possible number + * (i.e., only ones). For correct operation, the most significant bit of D + * has to be set. Put the quotient in Q and the remainder in R. + */ +#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \ + do { \ + mpi_limb_t _ql GCC_ATTR_UNUSED; \ + mpi_limb_t _q, _r; \ + mpi_limb_t _xh, _xl; \ + umul_ppmm (_q, _ql, (nh), (di)); \ + _q += (nh); /* DI is 2**BITS_PER_MPI_LIMB too small */ \ + umul_ppmm (_xh, _xl, _q, (d)); \ + sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \ + if( _xh ) { \ + sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \ + _q++; \ + if( _xh) { \ + sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \ + _q++; \ + } \ + } \ + if( _r >= (d) ) { \ + _r -= (d); \ + _q++; \ + } \ + (r) = _r; \ + (q) = _q; \ + } while (0) + + +/*-- mpiutil.c --*/ +#define mpi_alloc_limb_space(n,f) _gcry_mpi_alloc_limb_space((n),(f)) +mpi_ptr_t _gcry_mpi_alloc_limb_space( unsigned nlimbs, int sec ); +void _gcry_mpi_free_limb_space( mpi_ptr_t a, unsigned int nlimbs ); +void _gcry_mpi_assign_limb_space( gcry_mpi_t a, mpi_ptr_t ap, unsigned nlimbs ); + +/*-- mpi-bit.c --*/ +#define mpi_rshift_limbs(a,n) _gcry_mpi_rshift_limbs ((a), (n)) +#define mpi_lshift_limbs(a,n) _gcry_mpi_lshift_limbs ((a), (n)) + +void _gcry_mpi_rshift_limbs( gcry_mpi_t a, unsigned int count ); +void _gcry_mpi_lshift_limbs( gcry_mpi_t a, unsigned int count ); + + +/*-- mpih-add.c --*/ +mpi_limb_t _gcry_mpih_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb ); +mpi_limb_t _gcry_mpih_add_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size); +mpi_limb_t _gcry_mpih_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size); + +/*-- mpih-sub.c --*/ +mpi_limb_t _gcry_mpih_sub_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb ); +mpi_limb_t _gcry_mpih_sub_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size); +mpi_limb_t _gcry_mpih_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size); + +/*-- mpih-cmp.c --*/ +int _gcry_mpih_cmp( mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size ); + +/*-- mpih-mul.c --*/ + +struct karatsuba_ctx { + struct karatsuba_ctx *next; + mpi_ptr_t tspace; + unsigned int tspace_nlimbs; + mpi_size_t tspace_size; + mpi_ptr_t tp; + unsigned int tp_nlimbs; + mpi_size_t tp_size; +}; + +void _gcry_mpih_release_karatsuba_ctx( struct karatsuba_ctx *ctx ); + +mpi_limb_t _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); +mpi_limb_t _gcry_mpih_submul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); +void _gcry_mpih_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, + mpi_size_t size); +mpi_limb_t _gcry_mpih_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize); +void _gcry_mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size ); +void _gcry_mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, + mpi_ptr_t tspace); + +void _gcry_mpih_mul_karatsuba_case( mpi_ptr_t prodp, + mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize, + struct karatsuba_ctx *ctx ); + + +/*-- mpih-mul_1.c (or xxx/cpu/ *.S) --*/ +mpi_limb_t _gcry_mpih_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); + +/*-- mpih-div.c --*/ +mpi_limb_t _gcry_mpih_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb); +mpi_limb_t _gcry_mpih_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs, + mpi_ptr_t np, mpi_size_t nsize, + mpi_ptr_t dp, mpi_size_t dsize); +mpi_limb_t _gcry_mpih_divmod_1( mpi_ptr_t quot_ptr, + mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb); + +/*-- mpih-shift.c --*/ +mpi_limb_t _gcry_mpih_lshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, + unsigned cnt); +mpi_limb_t _gcry_mpih_rshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, + unsigned cnt); + +/*-- mpih-const-time.c --*/ +#define mpih_set_cond(w,u,s,o) _gcry_mpih_set_cond ((w),(u),(s),(o)) +#define mpih_add_n_cond(w,u,v,s,o) _gcry_mpih_add_n_cond ((w),(u),(v),(s),(o)) +#define mpih_sub_n_cond(w,u,v,s,o) _gcry_mpih_sub_n_cond ((w),(u),(v),(s),(o)) +#define mpih_swap_cond(u,v,s,o) _gcry_mpih_swap_cond ((u),(v),(s),(o)) +#define mpih_abs_cond(w,u,s,o) _gcry_mpih_abs_cond ((w),(u),(s),(o)) +#define mpih_mod(v,vs,u,us) _gcry_mpih_mod ((v),(vs),(u),(us)) + +void _gcry_mpih_set_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, + unsigned long op_enable); +mpi_limb_t _gcry_mpih_add_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp, + mpi_size_t usize, unsigned long op_enable); +mpi_limb_t _gcry_mpih_sub_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp, + mpi_size_t usize, unsigned long op_enable); +void _gcry_mpih_swap_cond (mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t usize, + unsigned long op_enable); +void _gcry_mpih_abs_cond (mpi_ptr_t wp, mpi_ptr_t up, + mpi_size_t usize, unsigned long op_enable); +mpi_ptr_t _gcry_mpih_mod (mpi_ptr_t vp, mpi_size_t vsize, + mpi_ptr_t up, mpi_size_t usize); +int _gcry_mpih_cmp_ui (mpi_ptr_t up, mpi_size_t usize, unsigned long v); + + +/* Define stuff for longlong.h. */ +#define W_TYPE_SIZE BITS_PER_MPI_LIMB + typedef mpi_limb_t UWtype; + typedef unsigned int UHWtype; +#if defined (__GNUC__) + typedef unsigned int UQItype __attribute__ ((mode (QI))); + typedef int SItype __attribute__ ((mode (SI))); + typedef unsigned int USItype __attribute__ ((mode (SI))); + typedef int DItype __attribute__ ((mode (DI))); + typedef unsigned int UDItype __attribute__ ((mode (DI))); +#else + typedef unsigned char UQItype; + typedef long SItype; + typedef unsigned long USItype; +#endif + +#ifdef __GNUC__ +#include "mpi-inline.h" +#endif + +#endif /*G10_MPI_INTERNAL_H*/ diff --git a/comm/third_party/libgcrypt/mpi/mpi-inv.c b/comm/third_party/libgcrypt/mpi/mpi-inv.c new file mode 100644 index 0000000000..7ce874666d --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpi-inv.c @@ -0,0 +1,565 @@ +/* mpi-inv.c - MPI functions + * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "g10lib.h" + +/* + * This uses a modular inversion algorithm designed by Niels Möller + * which was implemented in Nettle. The same algorithm was later also + * adapted to GMP in mpn_sec_invert. + * + * For the description of the algorithm, see Algorithm 5 in Appendix A + * of "Fast Software Polynomial Multiplication on ARM Processors using + * the NEON Engine" by Danilo Câmara, Conrado P. L. Gouvêa, Julio + * López, and Ricardo Dahab: + * https://hal.inria.fr/hal-01506572/document + * + * Note that in the reference above, at the line 2 of Algorithm 5, + * initial value of V was described as V:=1 wrongly. It must be V:=0. + */ +static mpi_ptr_t +mpih_invm_odd (mpi_ptr_t ap, mpi_ptr_t np, mpi_size_t nsize) +{ + int secure; + unsigned int iterations; + mpi_ptr_t n1hp; + mpi_ptr_t bp; + mpi_ptr_t up, vp; + + secure = _gcry_is_secure (ap); + up = mpi_alloc_limb_space (nsize, secure); + MPN_ZERO (up, nsize); + up[0] = 1; + + vp = mpi_alloc_limb_space (nsize, secure); + MPN_ZERO (vp, nsize); + + secure = _gcry_is_secure (np); + bp = mpi_alloc_limb_space (nsize, secure); + MPN_COPY (bp, np, nsize); + + n1hp = mpi_alloc_limb_space (nsize, secure); + MPN_COPY (n1hp, np, nsize); + _gcry_mpih_rshift (n1hp, n1hp, nsize, 1); + _gcry_mpih_add_1 (n1hp, n1hp, nsize, 1); + + iterations = 2 * nsize * BITS_PER_MPI_LIMB; + + while (iterations-- > 0) + { + mpi_limb_t odd_a, odd_u, underflow, borrow; + + odd_a = ap[0] & 1; + + underflow = mpih_sub_n_cond (ap, ap, bp, nsize, odd_a); + mpih_add_n_cond (bp, bp, ap, nsize, underflow); + mpih_abs_cond (ap, ap, nsize, underflow); + mpih_swap_cond (up, vp, nsize, underflow); + + _gcry_mpih_rshift (ap, ap, nsize, 1); + + borrow = mpih_sub_n_cond (up, up, vp, nsize, odd_a); + mpih_add_n_cond (up, up, np, nsize, borrow); + + odd_u = _gcry_mpih_rshift (up, up, nsize, 1) != 0; + mpih_add_n_cond (up, up, n1hp, nsize, odd_u); + } + + _gcry_mpi_free_limb_space (n1hp, nsize); + _gcry_mpi_free_limb_space (up, nsize); + + if (_gcry_mpih_cmp_ui (bp, nsize, 1) == 0) + { + /* Inverse exists. */ + _gcry_mpi_free_limb_space (bp, nsize); + return vp; + } + else + { + _gcry_mpi_free_limb_space (bp, nsize); + _gcry_mpi_free_limb_space (vp, nsize); + return NULL; + } +} + + +/* + * Calculate the multiplicative inverse X of A mod 2^K + * A must be positive. + * + * See section 7 in "A New Algorithm for Inversion mod p^k" by Çetin + * Kaya Koç: https://eprint.iacr.org/2017/411.pdf + */ +static mpi_ptr_t +mpih_invm_pow2 (mpi_ptr_t ap, mpi_size_t asize, unsigned int k) +{ + int secure = _gcry_is_secure (ap); + mpi_size_t i; + unsigned int iterations; + mpi_ptr_t xp, wp, up, vp; + mpi_size_t usize; + + if (!(ap[0] & 1)) + return NULL; + + iterations = ((k + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB) + * BITS_PER_MPI_LIMB; + usize = iterations / BITS_PER_MPI_LIMB; + + up = mpi_alloc_limb_space (usize, secure); + MPN_ZERO (up, usize); + up[0] = 1; + + vp = mpi_alloc_limb_space (usize, secure); + for (i = 0; i < (usize < asize ? usize : asize); i++) + vp[i] = ap[i]; + for (; i < usize; i++) + vp[i] = 0; + if ((k % BITS_PER_MPI_LIMB)) + for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++) + vp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i); + + wp = mpi_alloc_limb_space (usize, secure); + MPN_COPY (wp, up, usize); + + xp = mpi_alloc_limb_space (usize, secure); + MPN_ZERO (xp, usize); + + /* + * It can be considered that overflow at _gcry_mpih_sub_n results + * adding 2^(USIZE*BITS_PER_MPI_LIMB), which is no problem in modulo + * 2^K computation. + */ + for (i = 0; i < iterations; i++) + { + int b0 = (up[0] & 1); + + xp[i/BITS_PER_MPI_LIMB] |= ((mpi_limb_t)b0<<(i%BITS_PER_MPI_LIMB)); + _gcry_mpih_sub_n (wp, up, vp, usize); + mpih_set_cond (up, wp, usize, b0); + _gcry_mpih_rshift (up, up, usize, 1); + } + + if ((k % BITS_PER_MPI_LIMB)) + for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++) + xp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i); + + _gcry_mpi_free_limb_space (up, usize); + _gcry_mpi_free_limb_space (vp, usize); + _gcry_mpi_free_limb_space (wp, usize); + + return xp; +} + + +/**************** + * Calculate the multiplicative inverse X of A mod N + * That is: Find the solution x for + * 1 = (a*x) mod n + */ +static int +mpi_invm_generic (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n) +{ + int is_gcd_one; +#if 0 + /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X) */ + gcry_mpi_t u, v, u1, u2, u3, v1, v2, v3, q, t1, t2, t3; + + u = mpi_copy(a); + v = mpi_copy(n); + u1 = mpi_alloc_set_ui(1); + u2 = mpi_alloc_set_ui(0); + u3 = mpi_copy(u); + v1 = mpi_alloc_set_ui(0); + v2 = mpi_alloc_set_ui(1); + v3 = mpi_copy(v); + q = mpi_alloc( mpi_get_nlimbs(u)+1 ); + t1 = mpi_alloc( mpi_get_nlimbs(u)+1 ); + t2 = mpi_alloc( mpi_get_nlimbs(u)+1 ); + t3 = mpi_alloc( mpi_get_nlimbs(u)+1 ); + while( mpi_cmp_ui( v3, 0 ) ) { + mpi_fdiv_q( q, u3, v3 ); + mpi_mul(t1, v1, q); mpi_mul(t2, v2, q); mpi_mul(t3, v3, q); + mpi_sub(t1, u1, t1); mpi_sub(t2, u2, t2); mpi_sub(t3, u3, t3); + mpi_set(u1, v1); mpi_set(u2, v2); mpi_set(u3, v3); + mpi_set(v1, t1); mpi_set(v2, t2); mpi_set(v3, t3); + } + /* log_debug("result:\n"); + log_mpidump("q =", q ); + log_mpidump("u1=", u1); + log_mpidump("u2=", u2); + log_mpidump("u3=", u3); + log_mpidump("v1=", v1); + log_mpidump("v2=", v2); */ + mpi_set(x, u1); + + is_gcd_one = (mpi_cmp_ui (u3, 1) == 0); + + mpi_free(u1); + mpi_free(u2); + mpi_free(u3); + mpi_free(v1); + mpi_free(v2); + mpi_free(v3); + mpi_free(q); + mpi_free(t1); + mpi_free(t2); + mpi_free(t3); + mpi_free(u); + mpi_free(v); +#elif 0 + /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X) + * modified according to Michael Penk's solution for Exercise 35 + * (in the first edition) + * In the third edition, it's Exercise 39, and it is described in + * page 646 of ANSWERS TO EXERCISES chapter. + */ + + /* FIXME: we can simplify this in most cases (see Knuth) */ + gcry_mpi_t u, v, u1, u2, u3, v1, v2, v3, t1, t2, t3; + unsigned k; + int sign; + + u = mpi_copy(a); + v = mpi_copy(n); + for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) { + mpi_rshift(u, u, 1); + mpi_rshift(v, v, 1); + } + + + u1 = mpi_alloc_set_ui(1); + u2 = mpi_alloc_set_ui(0); + u3 = mpi_copy(u); + v1 = mpi_copy(v); /* !-- used as const 1 */ + v2 = mpi_alloc( mpi_get_nlimbs(u) ); mpi_sub( v2, u1, u ); + v3 = mpi_copy(v); + if( mpi_test_bit(u, 0) ) { /* u is odd */ + t1 = mpi_alloc_set_ui(0); + t2 = mpi_alloc_set_ui(1); t2->sign = 1; + t3 = mpi_copy(v); t3->sign = !t3->sign; + goto Y4; + } + else { + t1 = mpi_alloc_set_ui(1); + t2 = mpi_alloc_set_ui(0); + t3 = mpi_copy(u); + } + do { + do { + if( mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0) ) { /* one is odd */ + mpi_add(t1, t1, v); + mpi_sub(t2, t2, u); + } + mpi_rshift(t1, t1, 1); + mpi_rshift(t2, t2, 1); + mpi_rshift(t3, t3, 1); + Y4: + ; + } while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */ + + if( !t3->sign ) { + mpi_set(u1, t1); + mpi_set(u2, t2); + mpi_set(u3, t3); + } + else { + mpi_sub(v1, v, t1); + sign = u->sign; u->sign = !u->sign; + mpi_sub(v2, u, t2); + u->sign = sign; + sign = t3->sign; t3->sign = !t3->sign; + mpi_set(v3, t3); + t3->sign = sign; + } + mpi_sub(t1, u1, v1); + mpi_sub(t2, u2, v2); + mpi_sub(t3, u3, v3); + if( t1->sign ) { + mpi_add(t1, t1, v); + mpi_sub(t2, t2, u); + } + } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */ + /* mpi_lshift( u3, u3, k ); */ + is_gcd_one = (k == 0 && mpi_cmp_ui (u3, 1) == 0); + mpi_set(x, u1); + + mpi_free(u1); + mpi_free(u2); + mpi_free(u3); + mpi_free(v1); + mpi_free(v2); + mpi_free(v3); + mpi_free(t1); + mpi_free(t2); + mpi_free(t3); +#else + /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X) + * modified according to Michael Penk's solution for Exercise 35 + * with further enhancement */ + /* The reference in the comment above is for the first edition. + * In the third edition, it's Exercise 39, and it is described in + * page 646 of ANSWERS TO EXERCISES chapter. + */ + gcry_mpi_t u, v, u1, u2=NULL, u3, v1, v2=NULL, v3, t1, t2=NULL, t3; + unsigned k; + int sign; + int odd ; + + u = mpi_copy(a); + v = mpi_copy(n); + + for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) { + mpi_rshift(u, u, 1); + mpi_rshift(v, v, 1); + } + odd = mpi_test_bit(v,0); + + u1 = mpi_alloc_set_ui(1); + if( !odd ) + u2 = mpi_alloc_set_ui(0); + u3 = mpi_copy(u); + v1 = mpi_copy(v); + if( !odd ) { + v2 = mpi_alloc( mpi_get_nlimbs(u) ); + mpi_sub( v2, u1, u ); /* U is used as const 1 */ + } + v3 = mpi_copy(v); + if( mpi_test_bit(u, 0) ) { /* u is odd */ + t1 = mpi_alloc_set_ui(0); + if( !odd ) { + t2 = mpi_alloc_set_ui(1); t2->sign = 1; + } + t3 = mpi_copy(v); t3->sign = !t3->sign; + goto Y4; + } + else { + t1 = mpi_alloc_set_ui(1); + if( !odd ) + t2 = mpi_alloc_set_ui(0); + t3 = mpi_copy(u); + } + do { + do { + if( !odd ) { + if( mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0) ) { /* one is odd */ + mpi_add(t1, t1, v); + mpi_sub(t2, t2, u); + } + mpi_rshift(t1, t1, 1); + mpi_rshift(t2, t2, 1); + mpi_rshift(t3, t3, 1); + } + else { + if( mpi_test_bit(t1, 0) ) + mpi_add(t1, t1, v); + mpi_rshift(t1, t1, 1); + mpi_rshift(t3, t3, 1); + } + Y4: + ; + } while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */ + + if( !t3->sign ) { + mpi_set(u1, t1); + if( !odd ) + mpi_set(u2, t2); + mpi_set(u3, t3); + } + else { + mpi_sub(v1, v, t1); + sign = u->sign; u->sign = !u->sign; + if( !odd ) + mpi_sub(v2, u, t2); + u->sign = sign; + sign = t3->sign; t3->sign = !t3->sign; + mpi_set(v3, t3); + t3->sign = sign; + } + mpi_sub(t1, u1, v1); + if( !odd ) + mpi_sub(t2, u2, v2); + mpi_sub(t3, u3, v3); + if( t1->sign ) { + mpi_add(t1, t1, v); + if( !odd ) + mpi_sub(t2, t2, u); + } + } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */ + /* mpi_lshift( u3, u3, k ); */ + is_gcd_one = (k == 0 && mpi_cmp_ui (u3, 1) == 0); + mpi_set(x, u1); + + mpi_free(u1); + mpi_free(v1); + mpi_free(t1); + if( !odd ) { + mpi_free(u2); + mpi_free(v2); + mpi_free(t2); + } + mpi_free(u3); + mpi_free(v3); + mpi_free(t3); + + mpi_free(u); + mpi_free(v); +#endif + return is_gcd_one; +} + + +/* + * Set X to the multiplicative inverse of A mod M. Return true if the + * inverse exists. + */ +int +_gcry_mpi_invm (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n) +{ + mpi_ptr_t ap, xp; + + if (!mpi_cmp_ui (a, 0)) + return 0; /* Inverse does not exists. */ + if (!mpi_cmp_ui (n, 1)) + return 0; /* Inverse does not exists. */ + + if (mpi_test_bit (n, 0)) + { + if (a->nlimbs <= n->nlimbs) + { + ap = mpi_alloc_limb_space (n->nlimbs, _gcry_is_secure (a->d)); + MPN_ZERO (ap, n->nlimbs); + MPN_COPY (ap, a->d, a->nlimbs); + } + else + ap = _gcry_mpih_mod (a->d, a->nlimbs, n->d, n->nlimbs); + + xp = mpih_invm_odd (ap, n->d, n->nlimbs); + _gcry_mpi_free_limb_space (ap, n->nlimbs); + + if (xp) + { + _gcry_mpi_assign_limb_space (x, xp, n->nlimbs); + x->nlimbs = n->nlimbs; + return 1; + } + else + return 0; /* Inverse does not exists. */ + } + else if (!a->sign && !n->sign) + { + unsigned int k = mpi_trailing_zeros (n); + mpi_size_t x1size = ((k + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB); + mpi_size_t hsize; + gcry_mpi_t q; + mpi_ptr_t x1p, x2p, q_invp, hp, diffp; + mpi_size_t i; + + if (k == _gcry_mpi_get_nbits (n) - 1) + { + x1p = mpih_invm_pow2 (a->d, a->nlimbs, k); + + if (x1p) + { + _gcry_mpi_assign_limb_space (x, x1p, x1size); + x->nlimbs = x1size; + return 1; + } + else + return 0; /* Inverse does not exists. */ + } + + /* N can be expressed as P * Q, where P = 2^K. P and Q are coprime. */ + /* + * Compute X1 = invm (A, P) and X2 = invm (A, Q), and combine + * them by Garner's formula, to get X = invm (A, P*Q). + * A special case of Chinese Remainder Theorem. + */ + + /* X1 = invm (A, P) */ + x1p = mpih_invm_pow2 (a->d, a->nlimbs, k); + if (!x1p) + return 0; /* Inverse does not exists. */ + + /* Q = N / P */ + q = mpi_new (0); + mpi_rshift (q, n, k); + + /* X2 = invm (A%Q, Q) */ + ap = _gcry_mpih_mod (a->d, a->nlimbs, q->d, q->nlimbs); + x2p = mpih_invm_odd (ap, q->d, q->nlimbs); + _gcry_mpi_free_limb_space (ap, q->nlimbs); + if (!x2p) + { + _gcry_mpi_free_limb_space (x1p, x1size); + mpi_free (q); + return 0; /* Inverse does not exists. */ + } + + /* Q_inv = Q^(-1) = invm (Q, P) */ + q_invp = mpih_invm_pow2 (q->d, q->nlimbs, k); + + /* H = (X1 - X2) * Q_inv % P */ + diffp = mpi_alloc_limb_space (x1size, _gcry_is_secure (a->d)); + if (x1size >= q->nlimbs) + _gcry_mpih_sub (diffp, x1p, x1size, x2p, q->nlimbs); + else + _gcry_mpih_sub_n (diffp, x1p, x2p, x1size); + _gcry_mpi_free_limb_space (x1p, x1size); + if ((k % BITS_PER_MPI_LIMB)) + for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++) + diffp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i); + + hsize = x1size * 2; + hp = mpi_alloc_limb_space (hsize, _gcry_is_secure (a->d)); + _gcry_mpih_mul_n (hp, diffp, q_invp, x1size); + _gcry_mpi_free_limb_space (diffp, x1size); + _gcry_mpi_free_limb_space (q_invp, x1size); + + for (i = x1size; i < hsize; i++) + hp[i] = 0; + if ((k % BITS_PER_MPI_LIMB)) + for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++) + hp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i); + + xp = mpi_alloc_limb_space (x1size + q->nlimbs, _gcry_is_secure (a->d)); + if (x1size >= q->nlimbs) + _gcry_mpih_mul (xp, hp, x1size, q->d, q->nlimbs); + else + _gcry_mpih_mul (xp, q->d, q->nlimbs, hp, x1size); + + _gcry_mpi_free_limb_space (hp, hsize); + + _gcry_mpih_add (xp, xp, x1size + q->nlimbs, x2p, q->nlimbs); + _gcry_mpi_free_limb_space (x2p, q->nlimbs); + + _gcry_mpi_assign_limb_space (x, xp, x1size + q->nlimbs); + x->nlimbs = x1size + q->nlimbs; + + mpi_free (q); + + return 1; + } + else + return mpi_invm_generic (x, a, n); +} diff --git a/comm/third_party/libgcrypt/mpi/mpi-mod.c b/comm/third_party/libgcrypt/mpi/mpi-mod.c new file mode 100644 index 0000000000..88624720c2 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpi-mod.c @@ -0,0 +1,188 @@ +/* mpi-mod.c - Modular reduction + Copyright (C) 1998, 1999, 2001, 2002, 2003, + 2007 Free Software Foundation, Inc. + + This file is part of Libgcrypt. + + Libgcrypt is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of + the License, or (at your option) any later version. + + Libgcrypt is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ + + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> + +#include "mpi-internal.h" +#include "longlong.h" +#include "g10lib.h" + + +/* Context used with Barrett reduction. */ +struct barrett_ctx_s +{ + gcry_mpi_t m; /* The modulus - may not be modified. */ + int m_copied; /* If true, M needs to be released. */ + int k; + gcry_mpi_t y; + gcry_mpi_t r1; /* Helper MPI. */ + gcry_mpi_t r2; /* Helper MPI. */ + gcry_mpi_t r3; /* Helper MPI allocated on demand. */ +}; + + + +void +_gcry_mpi_mod (gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor) +{ + _gcry_mpi_fdiv_r (rem, dividend, divisor); +} + + +/* This function returns a new context for Barrett based operations on + the modulus M. This context needs to be released using + _gcry_mpi_barrett_free. If COPY is true M will be transferred to + the context and the user may change M. If COPY is false, M may not + be changed until gcry_mpi_barrett_free has been called. */ +mpi_barrett_t +_gcry_mpi_barrett_init (gcry_mpi_t m, int copy) +{ + mpi_barrett_t ctx; + gcry_mpi_t tmp; + + mpi_normalize (m); + ctx = xcalloc (1, sizeof *ctx); + + if (copy) + { + ctx->m = mpi_copy (m); + ctx->m_copied = 1; + } + else + ctx->m = m; + + ctx->k = mpi_get_nlimbs (m); + tmp = mpi_alloc (ctx->k + 1); + + /* Barrett precalculation: y = floor(b^(2k) / m). */ + mpi_set_ui (tmp, 1); + mpi_lshift_limbs (tmp, 2 * ctx->k); + mpi_fdiv_q (tmp, tmp, m); + + ctx->y = tmp; + ctx->r1 = mpi_alloc ( 2 * ctx->k + 1 ); + ctx->r2 = mpi_alloc ( 2 * ctx->k + 1 ); + + return ctx; +} + +void +_gcry_mpi_barrett_free (mpi_barrett_t ctx) +{ + if (ctx) + { + mpi_free (ctx->y); + mpi_free (ctx->r1); + mpi_free (ctx->r2); + if (ctx->r3) + mpi_free (ctx->r3); + if (ctx->m_copied) + mpi_free (ctx->m); + xfree (ctx); + } +} + + +/* R = X mod M + + Using Barrett reduction. Before using this function + _gcry_mpi_barrett_init must have been called to do the + precalculations. CTX is the context created by this precalculation + and also conveys M. If the Barret reduction could no be done a + straightforward reduction method is used. + + We assume that these conditions are met: + Input: x =(x_2k-1 ...x_0)_b + m =(m_k-1 ....m_0)_b with m_k-1 != 0 + Output: r = x mod m + */ +void +_gcry_mpi_mod_barrett (gcry_mpi_t r, gcry_mpi_t x, mpi_barrett_t ctx) +{ + gcry_mpi_t m = ctx->m; + int k = ctx->k; + gcry_mpi_t y = ctx->y; + gcry_mpi_t r1 = ctx->r1; + gcry_mpi_t r2 = ctx->r2; + int sign; + + mpi_normalize (x); + if (mpi_get_nlimbs (x) > 2*k ) + { + mpi_mod (r, x, m); + return; + } + + sign = x->sign; + x->sign = 0; + + /* 1. q1 = floor( x / b^k-1) + * q2 = q1 * y + * q3 = floor( q2 / b^k+1 ) + * Actually, we don't need qx, we can work direct on r2 + */ + mpi_set ( r2, x ); + mpi_rshift_limbs ( r2, k-1 ); + mpi_mul ( r2, r2, y ); + mpi_rshift_limbs ( r2, k+1 ); + + /* 2. r1 = x mod b^k+1 + * r2 = q3 * m mod b^k+1 + * r = r1 - r2 + * 3. if r < 0 then r = r + b^k+1 + */ + mpi_set ( r1, x ); + if ( r1->nlimbs > k+1 ) /* Quick modulo operation. */ + r1->nlimbs = k+1; + mpi_mul ( r2, r2, m ); + if ( r2->nlimbs > k+1 ) /* Quick modulo operation. */ + r2->nlimbs = k+1; + mpi_sub ( r, r1, r2 ); + + if ( mpi_has_sign ( r ) ) + { + if (!ctx->r3) + { + ctx->r3 = mpi_alloc ( k + 2 ); + mpi_set_ui (ctx->r3, 1); + mpi_lshift_limbs (ctx->r3, k + 1 ); + } + mpi_add ( r, r, ctx->r3 ); + } + + /* 4. while r >= m do r = r - m */ + while ( mpi_cmp( r, m ) >= 0 ) + mpi_sub ( r, r, m ); + + x->sign = sign; +} + + +void +_gcry_mpi_mul_barrett (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, + mpi_barrett_t ctx) +{ + mpi_mul (w, u, v); + mpi_mod_barrett (w, w, ctx); +} diff --git a/comm/third_party/libgcrypt/mpi/mpi-mpow.c b/comm/third_party/libgcrypt/mpi/mpi-mpow.c new file mode 100644 index 0000000000..43bd641fb5 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpi-mpow.c @@ -0,0 +1,223 @@ +/* mpi-mpow.c - MPI functions + * Copyright (C) 1998, 1999, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> + +#include "mpi-internal.h" +#include "longlong.h" +#include "g10lib.h" + + +/* Barrett is slower than the classical way. It can be tweaked by + * using partial multiplications + */ +/*#define USE_BARRETT*/ + + + +#ifdef USE_BARRETT +static void barrett_mulm( gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m, gcry_mpi_t y, int k, gcry_mpi_t r1, gcry_mpi_t r2 ); +static gcry_mpi_t init_barrett( gcry_mpi_t m, int *k, gcry_mpi_t *r1, gcry_mpi_t *r2 ); +static int calc_barrett( gcry_mpi_t r, gcry_mpi_t x, gcry_mpi_t m, gcry_mpi_t y, int k, gcry_mpi_t r1, gcry_mpi_t r2 ); +#else +#define barrett_mulm( w, u, v, m, y, k, r1, r2 ) _gcry_mpi_mulm( (w), (u), (v), (m) ) +#endif + + +static int +build_index( gcry_mpi_t *exparray, int k, int i, int t ) +{ + int j, bitno; + int idx = 0; + + bitno = t-i; + for(j=k-1; j >= 0; j-- ) { + idx <<= 1; + if( mpi_test_bit( exparray[j], bitno ) ) + idx |= 1; + } + /*log_debug("t=%d i=%d idx=%d\n", t, i, idx );*/ + return idx; +} + +/**************** + * RES = (BASE[0] ^ EXP[0]) * (BASE[1] ^ EXP[1]) * ... * mod M + */ +void +_gcry_mpi_mulpowm( gcry_mpi_t res, gcry_mpi_t *basearray, gcry_mpi_t *exparray, gcry_mpi_t m) +{ + int k; /* number of elements */ + int t; /* bit size of largest exponent */ + int i, j, idx; + gcry_mpi_t *G; /* table with precomputed values of size 2^k */ + gcry_mpi_t tmp; +#ifdef USE_BARRETT + gcry_mpi_t barrett_y, barrett_r1, barrett_r2; + int barrett_k; +#endif + + for(k=0; basearray[k]; k++ ) + ; + gcry_assert(k); + for(t=0, i=0; (tmp=exparray[i]); i++ ) { + /*log_mpidump("exp: ", tmp );*/ + j = mpi_get_nbits(tmp); + if( j > t ) + t = j; + } + /*log_mpidump("mod: ", m );*/ + gcry_assert (i==k); + gcry_assert (t); + gcry_assert (k < 10); + + G = xcalloc( (1<<k) , sizeof *G ); +#ifdef USE_BARRETT + barrett_y = init_barrett( m, &barrett_k, &barrett_r1, &barrett_r2 ); +#endif + /* and calculate */ + tmp = mpi_alloc( mpi_get_nlimbs(m)+1 ); + mpi_set_ui( res, 1 ); + for(i = 1; i <= t; i++ ) { + barrett_mulm(tmp, res, res, m, barrett_y, barrett_k, + barrett_r1, barrett_r2 ); + idx = build_index( exparray, k, i, t ); + gcry_assert (idx >= 0 && idx < (1<<k)); + if( !G[idx] ) { + if( !idx ) + G[0] = mpi_alloc_set_ui( 1 ); + else { + for(j=0; j < k; j++ ) { + if( (idx & (1<<j) ) ) { + if( !G[idx] ) + G[idx] = mpi_copy( basearray[j] ); + else + barrett_mulm( G[idx], G[idx], basearray[j], + m, barrett_y, barrett_k, barrett_r1, barrett_r2 ); + } + } + if( !G[idx] ) + G[idx] = mpi_alloc(0); + } + } + barrett_mulm(res, tmp, G[idx], m, barrett_y, barrett_k, barrett_r1, barrett_r2 ); + } + + /* cleanup */ + mpi_free(tmp); +#ifdef USE_BARRETT + mpi_free(barrett_y); + mpi_free(barrett_r1); + mpi_free(barrett_r2); +#endif + for(i=0; i < (1<<k); i++ ) + mpi_free(G[i]); + xfree(G); +} + + + +#ifdef USE_BARRETT +static void +barrett_mulm( gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m, gcry_mpi_t y, int k, gcry_mpi_t r1, gcry_mpi_t r2 ) +{ + mpi_mul(w, u, v); + if( calc_barrett( w, w, m, y, k, r1, r2 ) ) + mpi_fdiv_r( w, w, m ); +} + +/**************** + * Barrett precalculation: y = floor(b^(2k) / m) + */ +static gcry_mpi_t +init_barrett( gcry_mpi_t m, int *k, gcry_mpi_t *r1, gcry_mpi_t *r2 ) +{ + gcry_mpi_t tmp; + + mpi_normalize( m ); + *k = mpi_get_nlimbs( m ); + tmp = mpi_alloc( *k + 1 ); + mpi_set_ui( tmp, 1 ); + mpi_lshift_limbs( tmp, 2 * *k ); + mpi_fdiv_q( tmp, tmp, m ); + *r1 = mpi_alloc( 2* *k + 1 ); + *r2 = mpi_alloc( 2* *k + 1 ); + return tmp; +} + +/**************** + * Barrett reduction: We assume that these conditions are met: + * Given x =(x_2k-1 ...x_0)_b + * m =(m_k-1 ....m_0)_b with m_k-1 != 0 + * Output r = x mod m + * Before using this function init_barret must be used to calucalte y and k. + * Returns: false = no error + * true = can't perform barret reduction + */ +static int +calc_barrett( gcry_mpi_t r, gcry_mpi_t x, gcry_mpi_t m, gcry_mpi_t y, int k, gcry_mpi_t r1, gcry_mpi_t r2 ) +{ + int xx = k > 3 ? k-3:0; + + mpi_normalize( x ); + if( mpi_get_nlimbs(x) > 2*k ) + return 1; /* can't do it */ + + /* 1. q1 = floor( x / b^k-1) + * q2 = q1 * y + * q3 = floor( q2 / b^k+1 ) + * Actually, we don't need qx, we can work direct on r2 + */ + mpi_set( r2, x ); + mpi_rshift_limbs( r2, k-1 ); + mpi_mul( r2, r2, y ); + mpi_rshift_limbs( r2, k+1 ); + + /* 2. r1 = x mod b^k+1 + * r2 = q3 * m mod b^k+1 + * r = r1 - r2 + * 3. if r < 0 then r = r + b^k+1 + */ + mpi_set( r1, x ); + if( r1->nlimbs > k+1 ) /* quick modulo operation */ + r1->nlimbs = k+1; + mpi_mul( r2, r2, m ); + if( r2->nlimbs > k+1 ) /* quick modulo operation */ + r2->nlimbs = k+1; + mpi_sub( r, r1, r2 ); + + if( mpi_has_sign (r) ) { + gcry_mpi_t tmp; + + tmp = mpi_alloc( k + 2 ); + mpi_set_ui( tmp, 1 ); + mpi_lshift_limbs( tmp, k+1 ); + mpi_add( r, r, tmp ); + mpi_free(tmp); + } + + /* 4. while r >= m do r = r - m */ + while( mpi_cmp( r, m ) >= 0 ) + mpi_sub( r, r, m ); + + return 0; +} +#endif /* USE_BARRETT */ diff --git a/comm/third_party/libgcrypt/mpi/mpi-mul.c b/comm/third_party/libgcrypt/mpi/mpi-mul.c new file mode 100644 index 0000000000..4f4d7096a7 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpi-mul.c @@ -0,0 +1,212 @@ +/* mpi-mul.c - MPI functions + * Copyright (C) 1994, 1996, 1998, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" + + +void +_gcry_mpi_mul_ui (gcry_mpi_t prod, gcry_mpi_t mult, unsigned long small_mult) +{ + mpi_size_t size, prod_size; + mpi_ptr_t prod_ptr; + mpi_limb_t cy; + int sign; + + size = mult->nlimbs; + sign = mult->sign; + + if( !size || !small_mult ) { + prod->nlimbs = 0; + prod->sign = 0; + return; + } + + prod_size = size + 1; + if( prod->alloced < prod_size ) + mpi_resize( prod, prod_size ); + prod_ptr = prod->d; + + cy = _gcry_mpih_mul_1( prod_ptr, mult->d, size, (mpi_limb_t)small_mult ); + if( cy ) + prod_ptr[size++] = cy; + prod->nlimbs = size; + prod->sign = sign; +} + + +void +_gcry_mpi_mul_2exp (gcry_mpi_t w, gcry_mpi_t u, unsigned long cnt) +{ + mpi_size_t usize, wsize, limb_cnt; + mpi_ptr_t wp; + mpi_limb_t wlimb; + int usign, wsign; + + usize = u->nlimbs; + usign = u->sign; + + if( !usize ) { + w->nlimbs = 0; + w->sign = 0; + return; + } + + limb_cnt = cnt / BITS_PER_MPI_LIMB; + wsize = usize + limb_cnt + 1; + if( w->alloced < wsize ) + mpi_resize(w, wsize ); + wp = w->d; + wsize = usize + limb_cnt; + wsign = usign; + + cnt %= BITS_PER_MPI_LIMB; + if( cnt ) { + wlimb = _gcry_mpih_lshift( wp + limb_cnt, u->d, usize, cnt ); + if( wlimb ) { + wp[wsize] = wlimb; + wsize++; + } + } + else { + MPN_COPY_DECR( wp + limb_cnt, u->d, usize ); + } + + /* Zero all whole limbs at low end. Do it here and not before calling + * mpn_lshift, not to lose for U == W. */ + MPN_ZERO( wp, limb_cnt ); + + w->nlimbs = wsize; + w->sign = wsign; +} + + +void +_gcry_mpi_mul (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v) +{ + mpi_size_t usize, vsize, wsize; + mpi_ptr_t up, vp, wp; + mpi_limb_t cy; + int usign, vsign, usecure, vsecure, sign_product; + int assign_wp=0; + mpi_ptr_t tmp_limb=NULL; + unsigned int tmp_limb_nlimbs = 0; + + if( u->nlimbs < v->nlimbs ) { /* Swap U and V. */ + usize = v->nlimbs; + usign = v->sign; + usecure = mpi_is_secure(v); + up = v->d; + vsize = u->nlimbs; + vsign = u->sign; + vsecure = mpi_is_secure(u); + vp = u->d; + } + else { + usize = u->nlimbs; + usign = u->sign; + usecure = mpi_is_secure(u); + up = u->d; + vsize = v->nlimbs; + vsign = v->sign; + vsecure = mpi_is_secure(v); + vp = v->d; + } + sign_product = usign ^ vsign; + wp = w->d; + + /* Ensure W has space enough to store the result. */ + wsize = usize + vsize; + if ( !mpi_is_secure (w) && (mpi_is_secure (u) || mpi_is_secure (v)) ) { + /* w is not allocated in secure space but u or v is. To make sure + * that no temporray results are stored in w, we temporary use + * a newly allocated limb space for w */ + wp = mpi_alloc_limb_space( wsize, 1 ); + assign_wp = 2; /* mark it as 2 so that we can later copy it back to + * mormal memory */ + } + else if( w->alloced < wsize ) { + if( wp == up || wp == vp ) { + wp = mpi_alloc_limb_space( wsize, mpi_is_secure(w) ); + assign_wp = 1; + } + else { + mpi_resize(w, wsize ); + wp = w->d; + } + } + else { /* Make U and V not overlap with W. */ + if( wp == up ) { + /* W and U are identical. Allocate temporary space for U. */ + tmp_limb_nlimbs = usize; + up = tmp_limb = mpi_alloc_limb_space( usize, usecure ); + /* Is V identical too? Keep it identical with U. */ + if( wp == vp ) + vp = up; + /* Copy to the temporary space. */ + MPN_COPY( up, wp, usize ); + } + else if( wp == vp ) { + /* W and V are identical. Allocate temporary space for V. */ + tmp_limb_nlimbs = vsize; + vp = tmp_limb = mpi_alloc_limb_space( vsize, vsecure ); + /* Copy to the temporary space. */ + MPN_COPY( vp, wp, vsize ); + } + } + + if( !vsize ) + wsize = 0; + else { + cy = _gcry_mpih_mul( wp, up, usize, vp, vsize ); + wsize -= cy? 0:1; + } + + if( assign_wp ) { + if (assign_wp == 2) { + /* copy the temp wp from secure memory back to normal memory */ + mpi_ptr_t tmp_wp = mpi_alloc_limb_space (wsize, 0); + MPN_COPY (tmp_wp, wp, wsize); + _gcry_mpi_free_limb_space (wp, 0); + wp = tmp_wp; + } + _gcry_mpi_assign_limb_space( w, wp, wsize ); + } + w->nlimbs = wsize; + w->sign = sign_product; + if( tmp_limb ) + _gcry_mpi_free_limb_space (tmp_limb, tmp_limb_nlimbs); +} + + +void +_gcry_mpi_mulm (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m) +{ + mpi_mul (w, u, v); + _gcry_mpi_tdiv_r (w, w, m); +} diff --git a/comm/third_party/libgcrypt/mpi/mpi-pow.c b/comm/third_party/libgcrypt/mpi/mpi-pow.c new file mode 100644 index 0000000000..62b4a80830 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpi-pow.c @@ -0,0 +1,772 @@ +/* mpi-pow.c - MPI functions for exponentiation + * Copyright (C) 1994, 1996, 1998, 2000, 2002 + * 2003 Free Software Foundation, Inc. + * 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mpi-internal.h" +#include "longlong.h" + + +/* + * When you need old implementation, please add compilation option + * -DUSE_ALGORITHM_SIMPLE_EXPONENTIATION + * or expose this line: +#define USE_ALGORITHM_SIMPLE_EXPONENTIATION 1 + */ + +#if defined(USE_ALGORITHM_SIMPLE_EXPONENTIATION) +/**************** + * RES = BASE ^ EXPO mod MOD + */ +void +_gcry_mpi_powm (gcry_mpi_t res, + gcry_mpi_t base, gcry_mpi_t expo, gcry_mpi_t mod) +{ + /* Pointer to the limbs of the arguments, their size and signs. */ + mpi_ptr_t rp, ep, mp, bp; + mpi_size_t esize, msize, bsize, rsize; + int msign, bsign, rsign; + /* Flags telling the secure allocation status of the arguments. */ + int esec, msec, bsec; + /* Size of the result including space for temporary values. */ + mpi_size_t size; + /* Helper. */ + int mod_shift_cnt; + int negative_result; + mpi_ptr_t mp_marker = NULL; + mpi_ptr_t bp_marker = NULL; + mpi_ptr_t ep_marker = NULL; + mpi_ptr_t xp_marker = NULL; + unsigned int mp_nlimbs = 0; + unsigned int bp_nlimbs = 0; + unsigned int ep_nlimbs = 0; + unsigned int xp_nlimbs = 0; + mpi_ptr_t tspace = NULL; + mpi_size_t tsize = 0; + + + esize = expo->nlimbs; + msize = mod->nlimbs; + size = 2 * msize; + msign = mod->sign; + + esec = mpi_is_secure(expo); + msec = mpi_is_secure(mod); + bsec = mpi_is_secure(base); + + rp = res->d; + ep = expo->d; + MPN_NORMALIZE(ep, esize); + + if (!msize) + _gcry_divide_by_zero(); + + if (!esize) + { + /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending + on if MOD equals 1. */ + res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1; + if (res->nlimbs) + { + RESIZE_IF_NEEDED (res, 1); + rp = res->d; + rp[0] = 1; + } + res->sign = 0; + goto leave; + } + + /* Normalize MOD (i.e. make its most significant bit set) as + required by mpn_divrem. This will make the intermediate values + in the calculation slightly larger, but the correct result is + obtained after a final reduction using the original MOD value. */ + mp_nlimbs = msec? msize:0; + mp = mp_marker = mpi_alloc_limb_space(msize, msec); + count_leading_zeros (mod_shift_cnt, mod->d[msize-1]); + if (mod_shift_cnt) + _gcry_mpih_lshift (mp, mod->d, msize, mod_shift_cnt); + else + MPN_COPY( mp, mod->d, msize ); + + bsize = base->nlimbs; + bsign = base->sign; + if (bsize > msize) + { + /* The base is larger than the module. Reduce it. + + Allocate (BSIZE + 1) with space for remainder and quotient. + (The quotient is (bsize - msize + 1) limbs.) */ + bp_nlimbs = bsec ? (bsize + 1):0; + bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec ); + MPN_COPY ( bp, base->d, bsize ); + /* We don't care about the quotient, store it above the + * remainder, at BP + MSIZE. */ + _gcry_mpih_divrem( bp + msize, 0, bp, bsize, mp, msize ); + bsize = msize; + /* Canonicalize the base, since we are going to multiply with it + quite a few times. */ + MPN_NORMALIZE( bp, bsize ); + } + else + bp = base->d; + + if (!bsize) + { + res->nlimbs = 0; + res->sign = 0; + goto leave; + } + + + /* Make BASE, EXPO and MOD not overlap with RES. */ + if ( rp == bp ) + { + /* RES and BASE are identical. Allocate temp. space for BASE. */ + gcry_assert (!bp_marker); + bp_nlimbs = bsec? bsize:0; + bp = bp_marker = mpi_alloc_limb_space( bsize, bsec ); + MPN_COPY(bp, rp, bsize); + } + if ( rp == ep ) + { + /* RES and EXPO are identical. Allocate temp. space for EXPO. */ + ep_nlimbs = esec? esize:0; + ep = ep_marker = mpi_alloc_limb_space( esize, esec ); + MPN_COPY(ep, rp, esize); + } + if ( rp == mp ) + { + /* RES and MOD are identical. Allocate temporary space for MOD.*/ + gcry_assert (!mp_marker); + mp_nlimbs = msec?msize:0; + mp = mp_marker = mpi_alloc_limb_space( msize, msec ); + MPN_COPY(mp, rp, msize); + } + + /* Copy base to the result. */ + if (res->alloced < size) + { + mpi_resize (res, size); + rp = res->d; + } + MPN_COPY ( rp, bp, bsize ); + rsize = bsize; + rsign = 0; + + /* Main processing. */ + { + mpi_size_t i; + mpi_ptr_t xp; + int c; + mpi_limb_t e; + mpi_limb_t carry_limb; + struct karatsuba_ctx karactx; + struct gcry_mpi w, u; + + xp_nlimbs = msec? size:0; + xp = xp_marker = mpi_alloc_limb_space( size, msec ); + + w.sign = u.sign = 0; + w.flags = u.flags = 0; + w.alloced = w.nlimbs = size; /* RES->alloc may be longer. */ + u.alloced = u.nlimbs = size; + + memset( &karactx, 0, sizeof karactx ); + negative_result = (ep[0] & 1) && bsign; + + i = esize - 1; + e = ep[i]; + count_leading_zeros (c, e); + e = (e << c) << 1; /* Shift the expo bits to the left, lose msb. */ + c = BITS_PER_MPI_LIMB - 1 - c; + + /* Main loop. + + Make the result be pointed to alternately by XP and RP. This + helps us avoid block copying, which would otherwise be + necessary with the overlap restrictions of + _gcry_mpih_divmod. With 50% probability the result after this + loop will be in the area originally pointed by RP (==RES->d), + and with 50% probability in the area originally pointed to by XP. */ + for (;;) + { + while (c) + { + mpi_ptr_t tp; + mpi_size_t xsize; + + /*mpih_mul_n(xp, rp, rp, rsize);*/ + if ( rsize < KARATSUBA_THRESHOLD ) + _gcry_mpih_sqr_n_basecase( xp, rp, rsize ); + else + { + if ( !tspace ) + { + tsize = 2 * rsize; + tspace = mpi_alloc_limb_space( tsize, 0 ); + } + else if ( tsize < (2*rsize) ) + { + _gcry_mpi_free_limb_space (tspace, 0); + tsize = 2 * rsize; + tspace = mpi_alloc_limb_space (tsize, 0 ); + } + _gcry_mpih_sqr_n (xp, rp, rsize, tspace); + } + + xsize = 2 * rsize; + if ( xsize > msize ) + { + _gcry_mpih_divrem(xp + msize, 0, xp, xsize, mp, msize); + xsize = msize; + } + + tp = rp; rp = xp; xp = tp; + rsize = xsize; + + /* To mitigate the Yarom/Falkner flush+reload cache + * side-channel attack on the RSA secret exponent, we do + * the multiplication regardless of the value of the + * high-bit of E. But to avoid this performance penalty + * we do it only if the exponent has been stored in secure + * memory and we can thus assume it is a secret exponent. */ + if (esec || (mpi_limb_signed_t)e < 0) + { + /*mpih_mul( xp, rp, rsize, bp, bsize );*/ + if( bsize < KARATSUBA_THRESHOLD ) + _gcry_mpih_mul ( xp, rp, rsize, bp, bsize ); + else + _gcry_mpih_mul_karatsuba_case (xp, rp, rsize, bp, bsize, + &karactx); + + xsize = rsize + bsize; + if ( xsize > msize ) + { + _gcry_mpih_divrem(xp + msize, 0, xp, xsize, mp, msize); + xsize = msize; + } + } + + w.d = rp; + u.d = xp; + mpi_set_cond (&w, &u, ((mpi_limb_signed_t)e < 0)); + + e <<= 1; + c--; + } + + i--; + if ( i < 0 ) + break; + e = ep[i]; + c = BITS_PER_MPI_LIMB; + } + + /* We shifted MOD, the modulo reduction argument, left + MOD_SHIFT_CNT steps. Adjust the result by reducing it with the + original MOD. + + Also make sure the result is put in RES->d (where it already + might be, see above). */ + if ( mod_shift_cnt ) + { + carry_limb = _gcry_mpih_lshift( res->d, rp, rsize, mod_shift_cnt); + rp = res->d; + if ( carry_limb ) + { + rp[rsize] = carry_limb; + rsize++; + } + } + else if (res->d != rp) + { + MPN_COPY (res->d, rp, rsize); + rp = res->d; + } + + if ( rsize >= msize ) + { + _gcry_mpih_divrem(rp + msize, 0, rp, rsize, mp, msize); + rsize = msize; + } + + /* Remove any leading zero words from the result. */ + if ( mod_shift_cnt ) + _gcry_mpih_rshift( rp, rp, rsize, mod_shift_cnt); + MPN_NORMALIZE (rp, rsize); + + _gcry_mpih_release_karatsuba_ctx (&karactx ); + } + + /* Fixup for negative results. */ + if ( negative_result && rsize ) + { + if ( mod_shift_cnt ) + _gcry_mpih_rshift( mp, mp, msize, mod_shift_cnt); + _gcry_mpih_sub( rp, mp, msize, rp, rsize); + rsize = msize; + rsign = msign; + MPN_NORMALIZE(rp, rsize); + } + gcry_assert (res->d == rp); + res->nlimbs = rsize; + res->sign = rsign; + + leave: + if (mp_marker) + _gcry_mpi_free_limb_space( mp_marker, mp_nlimbs ); + if (bp_marker) + _gcry_mpi_free_limb_space( bp_marker, bp_nlimbs ); + if (ep_marker) + _gcry_mpi_free_limb_space( ep_marker, ep_nlimbs ); + if (xp_marker) + _gcry_mpi_free_limb_space( xp_marker, xp_nlimbs ); + if (tspace) + _gcry_mpi_free_limb_space( tspace, 0 ); +} +#else +/** + * Internal function to compute + * + * X = R * S mod M + * + * and set the size of X at the pointer XSIZE_P. + * Use karatsuba structure at KARACTX_P. + * + * Condition: + * RSIZE >= SSIZE + * Enough space for X is allocated beforehand. + * + * For generic cases, we can/should use gcry_mpi_mulm. + * This function is use for specific internal case. + */ +static void +mul_mod (mpi_ptr_t xp, mpi_size_t *xsize_p, + mpi_ptr_t rp, mpi_size_t rsize, + mpi_ptr_t sp, mpi_size_t ssize, + mpi_ptr_t mp, mpi_size_t msize, + struct karatsuba_ctx *karactx_p) +{ + if( ssize < KARATSUBA_THRESHOLD ) + _gcry_mpih_mul ( xp, rp, rsize, sp, ssize ); + else + _gcry_mpih_mul_karatsuba_case (xp, rp, rsize, sp, ssize, karactx_p); + + if (rsize + ssize > msize) + { + _gcry_mpih_divrem (xp + msize, 0, xp, rsize + ssize, mp, msize); + *xsize_p = msize; + } + else + *xsize_p = rsize + ssize; +} + +#define SIZE_PRECOMP ((1 << (5 - 1))) + +/**************** + * RES = BASE ^ EXPO mod MOD + * + * To mitigate the Yarom/Falkner flush+reload cache side-channel + * attack on the RSA secret exponent, we don't use the square + * routine but multiplication. + * + * Reference: + * Handbook of Applied Cryptography + * Algorithm 14.83: Modified left-to-right k-ary exponentiation + */ +void +_gcry_mpi_powm (gcry_mpi_t res, + gcry_mpi_t base, gcry_mpi_t expo, gcry_mpi_t mod) +{ + /* Pointer to the limbs of the arguments, their size and signs. */ + mpi_ptr_t rp, ep, mp, bp; + mpi_size_t esize, msize, bsize, rsize; + int msign, bsign, rsign; + /* Flags telling the secure allocation status of the arguments. */ + int esec, msec, bsec; + /* Size of the result including space for temporary values. */ + mpi_size_t size; + /* Helper. */ + int mod_shift_cnt; + int negative_result; + mpi_ptr_t mp_marker = NULL; + mpi_ptr_t bp_marker = NULL; + mpi_ptr_t ep_marker = NULL; + mpi_ptr_t xp_marker = NULL; + unsigned int mp_nlimbs = 0; + unsigned int bp_nlimbs = 0; + unsigned int ep_nlimbs = 0; + unsigned int xp_nlimbs = 0; + mpi_ptr_t precomp[SIZE_PRECOMP]; /* Pre-computed array: BASE^1, ^3, ^5, ... */ + mpi_size_t precomp_size[SIZE_PRECOMP]; + mpi_size_t W; + mpi_ptr_t base_u; + mpi_size_t base_u_size; + mpi_size_t max_u_size; + + esize = expo->nlimbs; + msize = mod->nlimbs; + size = 2 * msize; + msign = mod->sign; + + ep = expo->d; + MPN_NORMALIZE(ep, esize); + + if (esize * BITS_PER_MPI_LIMB > 512) + W = 5; + else if (esize * BITS_PER_MPI_LIMB > 256) + W = 4; + else if (esize * BITS_PER_MPI_LIMB > 128) + W = 3; + else if (esize * BITS_PER_MPI_LIMB > 64) + W = 2; + else + W = 1; + + esec = mpi_is_secure(expo); + msec = mpi_is_secure(mod); + bsec = mpi_is_secure(base); + + rp = res->d; + + if (!msize) + _gcry_divide_by_zero(); + + if (!esize) + { + /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending + on if MOD equals 1. */ + res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1; + if (res->nlimbs) + { + RESIZE_IF_NEEDED (res, 1); + rp = res->d; + rp[0] = 1; + } + res->sign = 0; + goto leave; + } + + /* Normalize MOD (i.e. make its most significant bit set) as + required by mpn_divrem. This will make the intermediate values + in the calculation slightly larger, but the correct result is + obtained after a final reduction using the original MOD value. */ + mp_nlimbs = msec? msize:0; + mp = mp_marker = mpi_alloc_limb_space(msize, msec); + count_leading_zeros (mod_shift_cnt, mod->d[msize-1]); + if (mod_shift_cnt) + _gcry_mpih_lshift (mp, mod->d, msize, mod_shift_cnt); + else + MPN_COPY( mp, mod->d, msize ); + + bsize = base->nlimbs; + bsign = base->sign; + if (bsize > msize) + { + /* The base is larger than the module. Reduce it. + + Allocate (BSIZE + 1) with space for remainder and quotient. + (The quotient is (bsize - msize + 1) limbs.) */ + bp_nlimbs = bsec ? (bsize + 1):0; + bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec ); + MPN_COPY ( bp, base->d, bsize ); + /* We don't care about the quotient, store it above the + * remainder, at BP + MSIZE. */ + _gcry_mpih_divrem( bp + msize, 0, bp, bsize, mp, msize ); + bsize = msize; + /* Canonicalize the base, since we are going to multiply with it + quite a few times. */ + MPN_NORMALIZE( bp, bsize ); + } + else + bp = base->d; + + if (!bsize) + { + res->nlimbs = 0; + res->sign = 0; + goto leave; + } + + + /* Make BASE, EXPO not overlap with RES. We don't need to check MOD + because that has already been copied to the MP var. */ + if ( rp == bp ) + { + /* RES and BASE are identical. Allocate temp. space for BASE. */ + gcry_assert (!bp_marker); + bp_nlimbs = bsec? bsize:0; + bp = bp_marker = mpi_alloc_limb_space( bsize, bsec ); + MPN_COPY(bp, rp, bsize); + } + if ( rp == ep ) + { + /* RES and EXPO are identical. Allocate temp. space for EXPO. */ + ep_nlimbs = esec? esize:0; + ep = ep_marker = mpi_alloc_limb_space( esize, esec ); + MPN_COPY(ep, rp, esize); + } + + /* Copy base to the result. */ + if (res->alloced < size) + { + mpi_resize (res, size); + rp = res->d; + } + + /* Main processing. */ + { + mpi_size_t i, j, k; + mpi_ptr_t xp; + mpi_size_t xsize; + int c; + mpi_limb_t e; + mpi_limb_t carry_limb; + struct karatsuba_ctx karactx; + mpi_ptr_t tp; + + xp_nlimbs = msec? size:0; + xp = xp_marker = mpi_alloc_limb_space( size, msec ); + + memset( &karactx, 0, sizeof karactx ); + negative_result = (ep[0] & 1) && bsign; + + /* Precompute PRECOMP[], BASE^(2 * i + 1), BASE^1, ^3, ^5, ... */ + if (W > 1) /* X := BASE^2 */ + mul_mod (xp, &xsize, bp, bsize, bp, bsize, mp, msize, &karactx); + base_u = precomp[0] = mpi_alloc_limb_space (bsize, esec); + base_u_size = max_u_size = precomp_size[0] = bsize; + MPN_COPY (precomp[0], bp, bsize); + for (i = 1; i < (1 << (W - 1)); i++) + { /* PRECOMP[i] = BASE^(2 * i + 1) */ + if (xsize >= base_u_size) + mul_mod (rp, &rsize, xp, xsize, base_u, base_u_size, + mp, msize, &karactx); + else + mul_mod (rp, &rsize, base_u, base_u_size, xp, xsize, + mp, msize, &karactx); + base_u = precomp[i] = mpi_alloc_limb_space (rsize, esec); + base_u_size = precomp_size[i] = rsize; + if (max_u_size < base_u_size) + max_u_size = base_u_size; + MPN_COPY (precomp[i], rp, rsize); + } + + if (msize > max_u_size) + max_u_size = msize; + base_u = mpi_alloc_limb_space (max_u_size, esec); + MPN_ZERO (base_u, max_u_size); + + i = esize - 1; + + /* Main loop. + + Make the result be pointed to alternately by XP and RP. This + helps us avoid block copying, which would otherwise be + necessary with the overlap restrictions of + _gcry_mpih_divmod. With 50% probability the result after this + loop will be in the area originally pointed by RP (==RES->d), + and with 50% probability in the area originally pointed to by XP. */ + rsign = 0; + if (W == 1) + { + rsize = bsize; + } + else + { + rsize = msize; + MPN_ZERO (rp, rsize); + } + MPN_COPY ( rp, bp, bsize ); + + e = ep[i]; + count_leading_zeros (c, e); + e = (e << c) << 1; + c = BITS_PER_MPI_LIMB - 1 - c; + + j = 0; + + for (;;) + if (e == 0) + { + j += c; + if ( --i < 0 ) + break; + + e = ep[i]; + c = BITS_PER_MPI_LIMB; + } + else + { + int c0; + mpi_limb_t e0; + struct gcry_mpi w, u; + w.sign = u.sign = 0; + w.flags = u.flags = 0; + w.d = base_u; + + count_leading_zeros (c0, e); + e = (e << c0); + c -= c0; + j += c0; + + e0 = (e >> (BITS_PER_MPI_LIMB - W)); + if (c >= W) + c0 = 0; + else + { + if ( --i < 0 ) + { + e0 = (e >> (BITS_PER_MPI_LIMB - c)); + j += c - W; + goto last_step; + } + else + { + c0 = c; + e = ep[i]; + c = BITS_PER_MPI_LIMB; + e0 |= (e >> (BITS_PER_MPI_LIMB - (W - c0))); + } + } + + e = e << (W - c0); + c -= (W - c0); + + last_step: + count_trailing_zeros (c0, e0); + e0 = (e0 >> c0) >> 1; + + for (j += W - c0; j >= 0; j--) + { + + /* + * base_u <= precomp[e0] + * base_u_size <= precomp_size[e0] + */ + base_u_size = 0; + for (k = 0; k < (1<< (W - 1)); k++) + { + w.alloced = w.nlimbs = precomp_size[k]; + u.alloced = u.nlimbs = precomp_size[k]; + u.d = precomp[k]; + + mpi_set_cond (&w, &u, k == e0); + base_u_size |= ( precomp_size[k] & (0UL - (k == e0)) ); + } + + w.alloced = w.nlimbs = rsize; + u.alloced = u.nlimbs = rsize; + u.d = rp; + mpi_set_cond (&w, &u, j != 0); + base_u_size ^= ((base_u_size ^ rsize) & (0UL - (j != 0))); + + mul_mod (xp, &xsize, rp, rsize, base_u, base_u_size, + mp, msize, &karactx); + tp = rp; rp = xp; xp = tp; + rsize = xsize; + } + + j = c0; + if ( i < 0 ) + break; + } + + while (j--) + { + mul_mod (xp, &xsize, rp, rsize, rp, rsize, mp, msize, &karactx); + tp = rp; rp = xp; xp = tp; + rsize = xsize; + } + + /* We shifted MOD, the modulo reduction argument, left + MOD_SHIFT_CNT steps. Adjust the result by reducing it with the + original MOD. + + Also make sure the result is put in RES->d (where it already + might be, see above). */ + if ( mod_shift_cnt ) + { + carry_limb = _gcry_mpih_lshift( res->d, rp, rsize, mod_shift_cnt); + rp = res->d; + if ( carry_limb ) + { + rp[rsize] = carry_limb; + rsize++; + } + } + else if (res->d != rp) + { + MPN_COPY (res->d, rp, rsize); + rp = res->d; + } + + if ( rsize >= msize ) + { + _gcry_mpih_divrem(rp + msize, 0, rp, rsize, mp, msize); + rsize = msize; + } + + /* Remove any leading zero words from the result. */ + if ( mod_shift_cnt ) + _gcry_mpih_rshift( rp, rp, rsize, mod_shift_cnt); + MPN_NORMALIZE (rp, rsize); + + _gcry_mpih_release_karatsuba_ctx (&karactx ); + for (i = 0; i < (1 << (W - 1)); i++) + _gcry_mpi_free_limb_space( precomp[i], esec ? precomp_size[i] : 0 ); + _gcry_mpi_free_limb_space (base_u, esec ? max_u_size : 0); + } + + /* Fixup for negative results. */ + if ( negative_result && rsize ) + { + if ( mod_shift_cnt ) + _gcry_mpih_rshift( mp, mp, msize, mod_shift_cnt); + _gcry_mpih_sub( rp, mp, msize, rp, rsize); + rsize = msize; + rsign = msign; + MPN_NORMALIZE(rp, rsize); + } + gcry_assert (res->d == rp); + res->nlimbs = rsize; + res->sign = rsign; + + leave: + if (mp_marker) + _gcry_mpi_free_limb_space( mp_marker, mp_nlimbs ); + if (bp_marker) + _gcry_mpi_free_limb_space( bp_marker, bp_nlimbs ); + if (ep_marker) + _gcry_mpi_free_limb_space( ep_marker, ep_nlimbs ); + if (xp_marker) + _gcry_mpi_free_limb_space( xp_marker, xp_nlimbs ); +} +#endif diff --git a/comm/third_party/libgcrypt/mpi/mpi-scan.c b/comm/third_party/libgcrypt/mpi/mpi-scan.c new file mode 100644 index 0000000000..e27f7faa99 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpi-scan.c @@ -0,0 +1,130 @@ +/* mpi-scan.c - MPI functions + * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "longlong.h" + +/**************** + * Scan through an mpi and return byte for byte. a -1 is returned to indicate + * the end of the mpi. Scanning is done from the lsb to the msb, returned + * values are in the range of 0 .. 255. + * + * FIXME: This code is VERY ugly! + */ +/* int */ +/* _gcry_mpi_getbyte( gcry_mpi_t a, unsigned idx ) */ +/* { */ +/* int i, j; */ +/* unsigned n; */ +/* mpi_ptr_t ap; */ +/* mpi_limb_t limb; */ + +/* ap = a->d; */ +/* for(n=0,i=0; i < a->nlimbs; i++ ) { */ +/* limb = ap[i]; */ +/* for( j=0; j < BYTES_PER_MPI_LIMB; j++, n++ ) */ +/* if( n == idx ) */ +/* return (limb >> j*8) & 0xff; */ +/* } */ +/* return -1; */ +/* } */ + + +/**************** + * Put a value at position IDX into A. idx counts from lsb to msb + */ +/* void */ +/* _gcry_mpi_putbyte( gcry_mpi_t a, unsigned idx, int xc ) */ +/* { */ +/* int i, j; */ +/* unsigned n; */ +/* mpi_ptr_t ap; */ +/* mpi_limb_t limb, c; */ + +/* c = xc & 0xff; */ +/* ap = a->d; */ +/* for(n=0,i=0; i < a->alloced; i++ ) { */ +/* limb = ap[i]; */ +/* for( j=0; j < BYTES_PER_MPI_LIMB; j++, n++ ) */ +/* if( n == idx ) { */ +/* #if BYTES_PER_MPI_LIMB == 4 */ +/* if( j == 0 ) */ +/* limb = (limb & 0xffffff00) | c; */ +/* else if( j == 1 ) */ +/* limb = (limb & 0xffff00ff) | (c<<8); */ +/* else if( j == 2 ) */ +/* limb = (limb & 0xff00ffff) | (c<<16); */ +/* else */ +/* limb = (limb & 0x00ffffff) | (c<<24); */ +/* #elif BYTES_PER_MPI_LIMB == 8 */ +/* if( j == 0 ) */ +/* limb = (limb & 0xffffffffffffff00) | c; */ +/* else if( j == 1 ) */ +/* limb = (limb & 0xffffffffffff00ff) | (c<<8); */ +/* else if( j == 2 ) */ +/* limb = (limb & 0xffffffffff00ffff) | (c<<16); */ +/* else if( j == 3 ) */ +/* limb = (limb & 0xffffffff00ffffff) | (c<<24); */ +/* else if( j == 4 ) */ +/* limb = (limb & 0xffffff00ffffffff) | (c<<32); */ +/* else if( j == 5 ) */ +/* limb = (limb & 0xffff00ffffffffff) | (c<<40); */ +/* else if( j == 6 ) */ +/* limb = (limb & 0xff00ffffffffffff) | (c<<48); */ +/* else */ +/* limb = (limb & 0x00ffffffffffffff) | (c<<56); */ +/* #else */ +/* #error please enhance this function, its ugly - i know. */ +/* #endif */ +/* if( a->nlimbs <= i ) */ +/* a->nlimbs = i+1; */ +/* ap[i] = limb; */ +/* return; */ +/* } */ +/* } */ +/* abort(); /\* index out of range *\/ */ +/* } */ + + +/**************** + * Count the number of zerobits at the low end of A + */ +unsigned +_gcry_mpi_trailing_zeros( gcry_mpi_t a ) +{ + unsigned n, count = 0; + + for(n=0; n < a->nlimbs; n++ ) { + if( a->d[n] ) { + unsigned nn; + mpi_limb_t alimb = a->d[n]; + + count_trailing_zeros( nn, alimb ); + count += nn; + break; + } + count += BITS_PER_MPI_LIMB; + } + return count; + +} diff --git a/comm/third_party/libgcrypt/mpi/mpicoder.c b/comm/third_party/libgcrypt/mpi/mpicoder.c new file mode 100644 index 0000000000..f61f777f8d --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpicoder.c @@ -0,0 +1,958 @@ +/* mpicoder.c - Coder for the external representation of MPIs + * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 + * 2008 Free Software Foundation, Inc. + * Copyright (C) 2013, 2014 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#include "mpi-internal.h" +#include "g10lib.h" + +/* The maximum length we support in the functions converting an + * external representation to an MPI. This limit is used to catch + * programming errors and to avoid DoS due to insane long allocations. + * The 16 MiB limit is actually ridiculous large but some of those PQC + * algorithms use quite large keys and they might end up using MPIs + * for that. */ +#define MAX_EXTERN_SCAN_BYTES (16*1024*1024) + +/* The maximum length (in bits) we support for OpenPGP MPIs. Note + * that OpenPGP's MPI format uses only two bytes and thus would be + * limited to 64k anyway. Note that this limit matches that used by + * GnuPG. */ +#define MAX_EXTERN_MPI_BITS 16384 + + +/* Helper used to scan PGP style MPIs. Returns NULL on failure. */ +static gcry_mpi_t +mpi_read_from_buffer (const unsigned char *buffer, unsigned *ret_nread, + int secure) +{ + int i, j; + unsigned int nbits, nbytes, nlimbs, nread=0; + mpi_limb_t a; + gcry_mpi_t val = MPI_NULL; + + if ( *ret_nread < 2 ) + goto leave; + nbits = buffer[0] << 8 | buffer[1]; + if ( nbits > MAX_EXTERN_MPI_BITS ) + { +/* log_debug ("mpi too large (%u bits)\n", nbits); */ + goto leave; + } + buffer += 2; + nread = 2; + + nbytes = (nbits+7) / 8; + nlimbs = (nbytes+BYTES_PER_MPI_LIMB-1) / BYTES_PER_MPI_LIMB; + val = secure? mpi_alloc_secure (nlimbs) : mpi_alloc (nlimbs); + i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; + i %= BYTES_PER_MPI_LIMB; + j= val->nlimbs = nlimbs; + val->sign = 0; + for ( ; j > 0; j-- ) + { + a = 0; + for (; i < BYTES_PER_MPI_LIMB; i++ ) + { + if ( ++nread > *ret_nread ) + { +/* log_debug ("mpi larger than buffer"); */ + mpi_free (val); + val = NULL; + goto leave; + } + a <<= 8; + a |= *buffer++; + } + i = 0; + val->d[j-1] = a; + } + + leave: + *ret_nread = nread; + return val; +} + + +/**************** + * Fill the mpi VAL from the hex string in STR. + */ +static int +mpi_fromstr (gcry_mpi_t val, const char *str) +{ + int sign = 0; + int prepend_zero = 0; + int i, j, c, c1, c2; + unsigned int nbits, nbytes, nlimbs; + mpi_limb_t a; + + if ( *str == '-' ) + { + sign = 1; + str++; + } + + /* Skip optional hex prefix. */ + if ( *str == '0' && str[1] == 'x' ) + str += 2; + + nbits = strlen (str); + if (nbits > MAX_EXTERN_SCAN_BYTES) + { + mpi_clear (val); + return 1; /* Error. */ + } + nbits *= 4; + if ((nbits % 8)) + prepend_zero = 1; + + nbytes = (nbits+7) / 8; + nlimbs = (nbytes+BYTES_PER_MPI_LIMB-1) / BYTES_PER_MPI_LIMB; + + if ( val->alloced < nlimbs ) + mpi_resize (val, nlimbs); + + i = BYTES_PER_MPI_LIMB - (nbytes % BYTES_PER_MPI_LIMB); + i %= BYTES_PER_MPI_LIMB; + j = val->nlimbs = nlimbs; + val->sign = sign; + for (; j > 0; j--) + { + a = 0; + for (; i < BYTES_PER_MPI_LIMB; i++) + { + if (prepend_zero) + { + c1 = '0'; + prepend_zero = 0; + } + else + c1 = *str++; + + if (!c1) + { + mpi_clear (val); + return 1; /* Error. */ + } + c2 = *str++; + if (!c2) + { + mpi_clear (val); + return 1; /* Error. */ + } + if ( c1 >= '0' && c1 <= '9' ) + c = c1 - '0'; + else if ( c1 >= 'a' && c1 <= 'f' ) + c = c1 - 'a' + 10; + else if ( c1 >= 'A' && c1 <= 'F' ) + c = c1 - 'A' + 10; + else + { + mpi_clear (val); + return 1; /* Error. */ + } + c <<= 4; + if ( c2 >= '0' && c2 <= '9' ) + c |= c2 - '0'; + else if( c2 >= 'a' && c2 <= 'f' ) + c |= c2 - 'a' + 10; + else if( c2 >= 'A' && c2 <= 'F' ) + c |= c2 - 'A' + 10; + else + { + mpi_clear(val); + return 1; /* Error. */ + } + a <<= 8; + a |= c; + } + i = 0; + val->d[j-1] = a; + } + + return 0; /* Okay. */ +} + + +/* Return an allocated buffer with the MPI (msb first). NBYTES + receives the length of this buffer. If FILL_LE is not 0, the + returned value is stored as little endian and right padded with + zeroes so that the returned buffer has at least FILL_LE bytes. + + If EXTRAALLOC > 0 the returned buffer has these number of bytes + extra allocated at the end; if EXTRAALLOC < 0 the returned buffer + has the absolute value of EXTRAALLOC allocated at the begin of the + buffer (the are not initialized) and the MPI is stored right after + this. This feature is useful to allow the caller to prefix the + returned value. EXTRAALLOC is _not_ included in the value stored + at NBYTES. + + Caller must free the return string. This function returns an + allocated buffer with NBYTES set to zero if the value of A is zero. + If sign is not NULL, it will be set to the sign of the A. On error + NULL is returned and ERRNO set appropriately. */ +static unsigned char * +do_get_buffer (gcry_mpi_t a, unsigned int fill_le, int extraalloc, + unsigned int *nbytes, int *sign, int force_secure) +{ + unsigned char *p, *buffer, *retbuffer; + unsigned int length, tmp; + mpi_limb_t alimb; + int i; + size_t n, n2; + + if (sign) + *sign = a->sign; + + *nbytes = a->nlimbs * BYTES_PER_MPI_LIMB; + n = *nbytes? *nbytes:1; /* Allocate at least one byte. */ + if (n < fill_le) + n = fill_le; + if (extraalloc < 0) + n2 = n + -extraalloc; + else + n2 = n + extraalloc; + + retbuffer = (force_secure || mpi_is_secure(a))? xtrymalloc_secure (n2) + : xtrymalloc (n2); + if (!retbuffer) + return NULL; + if (extraalloc < 0) + buffer = retbuffer + -extraalloc; + else + buffer = retbuffer; + p = buffer; + + for (i=a->nlimbs-1; i >= 0; i--) + { + alimb = a->d[i]; +#if BYTES_PER_MPI_LIMB == 4 + *p++ = alimb >> 24; + *p++ = alimb >> 16; + *p++ = alimb >> 8; + *p++ = alimb ; +#elif BYTES_PER_MPI_LIMB == 8 + *p++ = alimb >> 56; + *p++ = alimb >> 48; + *p++ = alimb >> 40; + *p++ = alimb >> 32; + *p++ = alimb >> 24; + *p++ = alimb >> 16; + *p++ = alimb >> 8; + *p++ = alimb ; +#else +# error please implement for this limb size. +#endif + } + + if (fill_le) + { + length = *nbytes; + /* Reverse buffer and pad with zeroes. */ + for (i=0; i < length/2; i++) + { + tmp = buffer[i]; + buffer[i] = buffer[length-1-i]; + buffer[length-1-i] = tmp; + } + /* Pad with zeroes. */ + for (p = buffer + length; length < fill_le; length++) + *p++ = 0; + *nbytes = length; + + return retbuffer; + } + + /* This is sub-optimal but we need to do the shift operation because + the caller has to free the returned buffer. */ + for (p=buffer; *nbytes && !*p; p++, --*nbytes) + ; + if (p != buffer) + memmove (buffer, p, *nbytes); + return retbuffer; +} + + +byte * +_gcry_mpi_get_buffer (gcry_mpi_t a, unsigned int fill_le, + unsigned int *r_nbytes, int *sign) +{ + if (mpi_get_flag (a, GCRYMPI_FLAG_OPAQUE)) + { + unsigned int nbits; + byte *p = _gcry_mpi_get_opaque_copy (a, &nbits); + + if (r_nbytes) + *r_nbytes = (nbits+7)/8; + + return p; + } + else + return do_get_buffer (a, fill_le, 0, r_nbytes, sign, 0); +} + +byte * +_gcry_mpi_get_buffer_extra (gcry_mpi_t a, unsigned int fill_le, int extraalloc, + unsigned int *r_nbytes, int *sign) +{ + return do_get_buffer (a, fill_le, extraalloc, r_nbytes, sign, 0); +} + +byte * +_gcry_mpi_get_secure_buffer (gcry_mpi_t a, unsigned int fill_le, + unsigned int *r_nbytes, int *sign) +{ + return do_get_buffer (a, fill_le, 0, r_nbytes, sign, 1); +} + + +/* + * Use the NBYTES at BUFFER_ARG to update A. Set the sign of a to + * SIGN. + */ +void +_gcry_mpi_set_buffer (gcry_mpi_t a, const void *buffer_arg, + unsigned int nbytes, int sign) +{ + const unsigned char *buffer = (const unsigned char*)buffer_arg; + const unsigned char *p; + mpi_limb_t alimb; + int nlimbs; + int i; + + if (mpi_is_immutable (a)) + { + mpi_immutable_failed (); + return; + } + + nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB; + RESIZE_IF_NEEDED(a, nlimbs); + a->sign = sign; + + for (i=0, p = buffer+nbytes-1; p >= buffer+BYTES_PER_MPI_LIMB; ) + { +#if BYTES_PER_MPI_LIMB == 4 + alimb = (mpi_limb_t)*p-- ; + alimb |= (mpi_limb_t)*p-- << 8 ; + alimb |= (mpi_limb_t)*p-- << 16 ; + alimb |= (mpi_limb_t)*p-- << 24 ; +#elif BYTES_PER_MPI_LIMB == 8 + alimb = (mpi_limb_t)*p-- ; + alimb |= (mpi_limb_t)*p-- << 8 ; + alimb |= (mpi_limb_t)*p-- << 16 ; + alimb |= (mpi_limb_t)*p-- << 24 ; + alimb |= (mpi_limb_t)*p-- << 32 ; + alimb |= (mpi_limb_t)*p-- << 40 ; + alimb |= (mpi_limb_t)*p-- << 48 ; + alimb |= (mpi_limb_t)*p-- << 56 ; +#else +# error please implement for this limb size. +#endif + a->d[i++] = alimb; + } + if ( p >= buffer ) + { +#if BYTES_PER_MPI_LIMB == 4 + alimb = (mpi_limb_t)*p--; + if (p >= buffer) + alimb |= (mpi_limb_t)*p-- << 8; + if (p >= buffer) + alimb |= (mpi_limb_t)*p-- << 16; + if (p >= buffer) + alimb |= (mpi_limb_t)*p-- << 24; +#elif BYTES_PER_MPI_LIMB == 8 + alimb = (mpi_limb_t)*p--; + if (p >= buffer) + alimb |= (mpi_limb_t)*p-- << 8; + if (p >= buffer) + alimb |= (mpi_limb_t)*p-- << 16; + if (p >= buffer) + alimb |= (mpi_limb_t)*p-- << 24; + if (p >= buffer) + alimb |= (mpi_limb_t)*p-- << 32; + if (p >= buffer) + alimb |= (mpi_limb_t)*p-- << 40; + if (p >= buffer) + alimb |= (mpi_limb_t)*p-- << 48; + if (p >= buffer) + alimb |= (mpi_limb_t)*p-- << 56; +#else +# error please implement for this limb size. +#endif + a->d[i++] = alimb; + } + a->nlimbs = i; + gcry_assert (i == nlimbs); +} + + +static void +onecompl (gcry_mpi_t a) +{ + mpi_ptr_t ap; + mpi_size_t n; + unsigned int i; + unsigned int nbits; + + if (!a || mpi_is_immutable (a)) + { + mpi_immutable_failed (); + return; + } + + nbits = mpi_get_nbits (a); + + mpi_normalize (a); + ap = a->d; + n = a->nlimbs; + + for( i = 0; i < n; i++ ) + ap[i] ^= (mpi_limb_t)(-1); + + a->sign = 0; + mpi_clear_highbit (a, nbits-1); +} + + +/* Perform a two's complement operation on buffer P of size N bytes. */ +static void +twocompl (unsigned char *p, unsigned int n) +{ + int i; + + for (i=n-1; i >= 0 && !p[i]; i--) + ; + if (i >= 0) + { + if ((p[i] & 0x01)) + p[i] = (((p[i] ^ 0xfe) | 0x01) & 0xff); + else if ((p[i] & 0x02)) + p[i] = (((p[i] ^ 0xfc) | 0x02) & 0xfe); + else if ((p[i] & 0x04)) + p[i] = (((p[i] ^ 0xf8) | 0x04) & 0xfc); + else if ((p[i] & 0x08)) + p[i] = (((p[i] ^ 0xf0) | 0x08) & 0xf8); + else if ((p[i] & 0x10)) + p[i] = (((p[i] ^ 0xe0) | 0x10) & 0xf0); + else if ((p[i] & 0x20)) + p[i] = (((p[i] ^ 0xc0) | 0x20) & 0xe0); + else if ((p[i] & 0x40)) + p[i] = (((p[i] ^ 0x80) | 0x40) & 0xc0); + else + p[i] = 0x80; + + for (i--; i >= 0; i--) + p[i] ^= 0xff; + } +} + + +/* Convert the external representation of an integer stored in BUFFER + * with a length of BUFLEN into a newly create MPI returned in + * RET_MPI. If NSCANNED is not NULL, it will receive the number of + * bytes actually scanned after a successful operation. */ +gcry_err_code_t +_gcry_mpi_scan (struct gcry_mpi **ret_mpi, enum gcry_mpi_format format, + const void *buffer_arg, size_t buflen, size_t *nscanned) +{ + const unsigned char *buffer = (const unsigned char*)buffer_arg; + struct gcry_mpi *a = NULL; + unsigned int len; + int secure = (buffer && _gcry_is_secure (buffer)); + + if (buflen > MAX_EXTERN_SCAN_BYTES) + { + if (nscanned) + *nscanned = 0; + return GPG_ERR_INV_OBJ; + } + + if (format == GCRYMPI_FMT_SSH) + len = 0; + else + len = buflen; + + if (format == GCRYMPI_FMT_STD) + { + const unsigned char *s = buffer; + + a = secure? mpi_alloc_secure ((len+BYTES_PER_MPI_LIMB-1) + /BYTES_PER_MPI_LIMB) + : mpi_alloc ((len+BYTES_PER_MPI_LIMB-1)/BYTES_PER_MPI_LIMB); + if (len) + { + _gcry_mpi_set_buffer (a, s, len, 0); + a->sign = !!(*s & 0x80); + if (a->sign) + { + onecompl (a); + mpi_add_ui (a, a, 1); + a->sign = 1; + } + } + if (ret_mpi) + { + mpi_normalize ( a ); + *ret_mpi = a; + } + else + mpi_free(a); + if (nscanned) + *nscanned = len; + return 0; + } + else if (format == GCRYMPI_FMT_USG) + { + a = secure? mpi_alloc_secure ((len+BYTES_PER_MPI_LIMB-1) + /BYTES_PER_MPI_LIMB) + : mpi_alloc ((len+BYTES_PER_MPI_LIMB-1)/BYTES_PER_MPI_LIMB); + + if (len) + _gcry_mpi_set_buffer (a, buffer, len, 0); + if (ret_mpi) + { + mpi_normalize ( a ); + *ret_mpi = a; + } + else + mpi_free(a); + if (nscanned) + *nscanned = len; + return 0; + } + else if (format == GCRYMPI_FMT_PGP) + { + a = mpi_read_from_buffer (buffer, &len, secure); + if (nscanned) + *nscanned = len; + if (ret_mpi && a) + { + mpi_normalize (a); + *ret_mpi = a; + } + else if (a) + { + mpi_free(a); + a = NULL; + } + return a? 0 : GPG_ERR_INV_OBJ; + } + else if (format == GCRYMPI_FMT_SSH) + { + const unsigned char *s = buffer; + size_t n; + + /* This test is not strictly necessary and an assert (!len) + would be sufficient. We keep this test in case we later + allow the BUFLEN argument to act as a sanitiy check. Same + below. */ + if (len && len < 4) + return GPG_ERR_TOO_SHORT; + + n = (s[0] << 24 | s[1] << 16 | s[2] << 8 | s[3]); + s += 4; + if (len) + len -= 4; + if (len && n > len) + return GPG_ERR_TOO_LARGE; + + a = secure? mpi_alloc_secure ((n+BYTES_PER_MPI_LIMB-1) + /BYTES_PER_MPI_LIMB) + : mpi_alloc ((n+BYTES_PER_MPI_LIMB-1)/BYTES_PER_MPI_LIMB); + if (n) + { + _gcry_mpi_set_buffer( a, s, n, 0 ); + a->sign = !!(*s & 0x80); + if (a->sign) + { + onecompl (a); + mpi_add_ui (a, a, 1); + a->sign = 1; + } + } + if (nscanned) + *nscanned = n+4; + if (ret_mpi) + { + mpi_normalize ( a ); + *ret_mpi = a; + } + else + mpi_free(a); + return 0; + } + else if (format == GCRYMPI_FMT_HEX) + { + /* We can only handle C strings for now. */ + if (buflen) + return GPG_ERR_INV_ARG; + + a = secure? mpi_alloc_secure (0) : mpi_alloc(0); + if (mpi_fromstr (a, (const char *)buffer)) + { + mpi_free (a); + return GPG_ERR_INV_OBJ; + } + if (ret_mpi) + { + mpi_normalize ( a ); + *ret_mpi = a; + } + else + mpi_free(a); + if (nscanned) + *nscanned = strlen ((const char*)buffer); + return 0; + } + else + return GPG_ERR_INV_ARG; +} + + +/* Convert the big integer A into the external representation + described by FORMAT and store it in the provided BUFFER which has + been allocated by the user with a size of BUFLEN bytes. NWRITTEN + receives the actual length of the external representation unless it + has been passed as NULL. BUFFER may be NULL to query the required + length. */ +gcry_err_code_t +_gcry_mpi_print (enum gcry_mpi_format format, + unsigned char *buffer, size_t buflen, + size_t *nwritten, struct gcry_mpi *a) +{ + unsigned int nbits = mpi_get_nbits (a); + size_t len; + size_t dummy_nwritten; + int negative; + + if (!nwritten) + nwritten = &dummy_nwritten; + + /* Libgcrypt does no always care to set clear the sign if the value + is 0. For printing this is a bit of a surprise, in particular + because if some of the formats don't support negative numbers but + should be able to print a zero. Thus we need this extra test + for a negative number. */ + if (a->sign && _gcry_mpi_cmp_ui (a, 0)) + negative = 1; + else + negative = 0; + + len = buflen; + *nwritten = 0; + if (format == GCRYMPI_FMT_STD) + { + unsigned char *tmp; + int extra = 0; + unsigned int n; + + tmp = _gcry_mpi_get_buffer (a, 0, &n, NULL); + if (!tmp) + return gpg_err_code_from_syserror (); + + if (negative) + { + twocompl (tmp, n); + if (!(*tmp & 0x80)) + { + /* Need to extend the sign. */ + n++; + extra = 2; + } + } + else if (n && (*tmp & 0x80)) + { + /* Positive but the high bit of the returned buffer is set. + Thus we need to print an extra leading 0x00 so that the + output is interpreted as a positive number. */ + n++; + extra = 1; + } + + if (buffer && n > len) + { + /* The provided buffer is too short. */ + xfree (tmp); + return GPG_ERR_TOO_SHORT; + } + if (buffer) + { + unsigned char *s = buffer; + + if (extra == 1) + *s++ = 0; + else if (extra) + *s++ = 0xff; + memcpy (s, tmp, n-!!extra); + } + xfree (tmp); + *nwritten = n; + return 0; + } + else if (format == GCRYMPI_FMT_USG) + { + unsigned int n = (nbits + 7)/8; + + /* Note: We ignore the sign for this format. */ + /* FIXME: for performance reasons we should put this into + mpi_aprint because we can then use the buffer directly. */ + + if (buffer && n > len) + return GPG_ERR_TOO_SHORT; + if (buffer) + { + unsigned char *tmp; + + tmp = _gcry_mpi_get_buffer (a, 0, &n, NULL); + if (!tmp) + return gpg_err_code_from_syserror (); + memcpy (buffer, tmp, n); + xfree (tmp); + } + *nwritten = n; + return 0; + } + else if (format == GCRYMPI_FMT_PGP) + { + unsigned int n = (nbits + 7)/8; + + /* The PGP format can only handle unsigned integers. */ + if (negative) + return GPG_ERR_INV_ARG; + + if (buffer && n+2 > len) + return GPG_ERR_TOO_SHORT; + + if (buffer) + { + unsigned char *tmp; + unsigned char *s = buffer; + + s[0] = nbits >> 8; + s[1] = nbits; + + tmp = _gcry_mpi_get_buffer (a, 0, &n, NULL); + if (!tmp) + return gpg_err_code_from_syserror (); + memcpy (s+2, tmp, n); + xfree (tmp); + } + *nwritten = n+2; + return 0; + } + else if (format == GCRYMPI_FMT_SSH) + { + unsigned char *tmp; + int extra = 0; + unsigned int n; + + tmp = _gcry_mpi_get_buffer (a, 0, &n, NULL); + if (!tmp) + return gpg_err_code_from_syserror (); + + if (negative) + { + twocompl (tmp, n); + if (!(*tmp & 0x80)) + { + /* Need to extend the sign. */ + n++; + extra = 2; + } + } + else if (n && (*tmp & 0x80)) + { + n++; + extra=1; + } + + if (buffer && n+4 > len) + { + xfree(tmp); + return GPG_ERR_TOO_SHORT; + } + + if (buffer) + { + unsigned char *s = buffer; + + *s++ = n >> 24; + *s++ = n >> 16; + *s++ = n >> 8; + *s++ = n; + if (extra == 1) + *s++ = 0; + else if (extra) + *s++ = 0xff; + memcpy (s, tmp, n-!!extra); + } + xfree (tmp); + *nwritten = 4+n; + return 0; + } + else if (format == GCRYMPI_FMT_HEX) + { + unsigned char *tmp; + int i; + int extra = 0; + unsigned int n = 0; + + tmp = _gcry_mpi_get_buffer (a, 0, &n, NULL); + if (!tmp) + return gpg_err_code_from_syserror (); + if (!n || (*tmp & 0x80)) + extra = 2; + + if (buffer && 2*n + extra + negative + 1 > len) + { + xfree(tmp); + return GPG_ERR_TOO_SHORT; + } + if (buffer) + { + unsigned char *s = buffer; + + if (negative) + *s++ = '-'; + if (extra) + { + *s++ = '0'; + *s++ = '0'; + } + + for (i=0; i < n; i++) + { + unsigned int c = tmp[i]; + + *s++ = (c >> 4) < 10? '0'+(c>>4) : 'A'+(c>>4)-10 ; + c &= 15; + *s++ = c < 10? '0'+c : 'A'+c-10 ; + } + *s++ = 0; + *nwritten = s - buffer; + } + else + { + *nwritten = 2*n + extra + negative + 1; + } + xfree (tmp); + return 0; + } + else + return GPG_ERR_INV_ARG; +} + + +/* + * Like gcry_mpi_print but this function allocates the buffer itself. + * The caller has to supply the address of a pointer. NWRITTEN may be + * NULL. + */ +gcry_err_code_t +_gcry_mpi_aprint (enum gcry_mpi_format format, + unsigned char **buffer, size_t *nwritten, + struct gcry_mpi *a) +{ + size_t n; + gcry_err_code_t rc; + + *buffer = NULL; + rc = _gcry_mpi_print (format, NULL, 0, &n, a); + if (rc) + return rc; + + *buffer = mpi_is_secure(a) ? xtrymalloc_secure (n?n:1) : xtrymalloc (n?n:1); + if (!*buffer) + return gpg_err_code_from_syserror (); + /* If the returned buffer will have a length of 0, we nevertheless + allocated 1 byte (malloc needs it anyway) and store a 0. */ + if (!n) + **buffer = 0; + rc = _gcry_mpi_print( format, *buffer, n, &n, a ); + if (rc) + { + xfree (*buffer); + *buffer = NULL; + } + else if (nwritten) + *nwritten = n; + return rc; +} + + +/* Turn VALUE into an octet string and store it in an allocated buffer + at R_FRAME or - if R_RAME is NULL - copy it into the caller + provided buffer SPACE; either SPACE or R_FRAME may be used. If + SPACE if not NULL, the caller must provide a buffer of at least + NBYTES. If the resulting octet string is shorter than NBYTES pad + it to the left with zeroes. If VALUE does not fit into NBYTES + return an error code. */ +gpg_err_code_t +_gcry_mpi_to_octet_string (unsigned char **r_frame, void *space, + gcry_mpi_t value, size_t nbytes) +{ + gpg_err_code_t rc; + size_t nframe, noff, n; + unsigned char *frame; + + if (!r_frame == !space) + return GPG_ERR_INV_ARG; /* Only one may be used. */ + + if (r_frame) + *r_frame = NULL; + + rc = _gcry_mpi_print (GCRYMPI_FMT_USG, NULL, 0, &nframe, value); + if (rc) + return rc; + if (nframe > nbytes) + return GPG_ERR_TOO_LARGE; /* Value too long to fit into NBYTES. */ + + noff = (nframe < nbytes)? nbytes - nframe : 0; + n = nframe + noff; + if (space) + frame = space; + else + { + frame = mpi_is_secure (value)? xtrymalloc_secure (n) : xtrymalloc (n); + if (!frame) + { + rc = gpg_err_code_from_syserror (); + return rc; + } + } + if (noff) + memset (frame, 0, noff); + nframe += noff; + rc = _gcry_mpi_print (GCRYMPI_FMT_USG, frame+noff, nframe-noff, NULL, value); + if (rc) + { + xfree (frame); + return rc; + } + + if (r_frame) + *r_frame = frame; + return 0; +} diff --git a/comm/third_party/libgcrypt/mpi/mpih-const-time.c b/comm/third_party/libgcrypt/mpi/mpih-const-time.c new file mode 100644 index 0000000000..968995059c --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpih-const-time.c @@ -0,0 +1,197 @@ +/* mpih-const-time.c - Constant-time MPI helper functions + * Copyright (C) 2020 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "g10lib.h" + +#define A_LIMB_1 ((mpi_limb_t)1) + +/* + * W = U when OP_ENABLED=1 + * otherwise, W keeps old value + */ +void +_gcry_mpih_set_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, + unsigned long op_enable) +{ + mpi_size_t i; + mpi_limb_t mask = ((mpi_limb_t)0) - op_enable; + mpi_limb_t x; + + for (i = 0; i < usize; i++) + { + x = mask & (wp[i] ^ up[i]); + wp[i] = wp[i] ^ x; + } +} + + +/* + * W = U + V when OP_ENABLED=1 + * otherwise, W = U + */ +mpi_limb_t +_gcry_mpih_add_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp, + mpi_size_t usize, unsigned long op_enable) +{ + mpi_size_t i; + mpi_limb_t cy; + mpi_limb_t mask = ((mpi_limb_t)0) - op_enable; + + cy = 0; + for (i = 0; i < usize; i++) + { + mpi_limb_t x = up[i] + (vp[i] & mask); + mpi_limb_t cy1 = x < up[i]; + mpi_limb_t cy2; + + x = x + cy; + cy2 = x < cy; + cy = cy1 | cy2; + wp[i] = x; + } + + return cy; +} + + +/* + * W = U - V when OP_ENABLED=1 + * otherwise, W = U + */ +mpi_limb_t +_gcry_mpih_sub_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp, + mpi_size_t usize, unsigned long op_enable) +{ + mpi_size_t i; + mpi_limb_t cy; + mpi_limb_t mask = ((mpi_limb_t)0) - op_enable; + + cy = 0; + for (i = 0; i < usize; i++) + { + mpi_limb_t x = up[i] - (vp[i] & mask); + mpi_limb_t cy1 = x > up[i]; + mpi_limb_t cy2; + + cy2 = x < cy; + x = x - cy; + cy = cy1 | cy2; + wp[i] = x; + } + + return cy; +} + + +/* + * Swap value of U and V when OP_ENABLED=1 + * otherwise, no change + */ +void +_gcry_mpih_swap_cond (mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t usize, + unsigned long op_enable) +{ + mpi_size_t i; + mpi_limb_t mask = ((mpi_limb_t)0) - op_enable; + + for (i = 0; i < usize; i++) + { + mpi_limb_t x = mask & (up[i] ^ vp[i]); + + up[i] = up[i] ^ x; + vp[i] = vp[i] ^ x; + } +} + + +/* + * W = -U when OP_ENABLED=1 + * otherwise, W = U + */ +void +_gcry_mpih_abs_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, + unsigned long op_enable) +{ + mpi_size_t i; + mpi_limb_t mask = ((mpi_limb_t)0) - op_enable; + mpi_limb_t cy = op_enable; + + for (i = 0; i < usize; i++) + { + mpi_limb_t x = ~up[i] + cy; + + cy = (x < ~up[i]); + wp[i] = up[i] ^ (mask & (x ^ up[i])); + } +} + + +/* + * Allocating memory for W, + * compute W = V % U, then return W + */ +mpi_ptr_t +_gcry_mpih_mod (mpi_ptr_t vp, mpi_size_t vsize, + mpi_ptr_t up, mpi_size_t usize) +{ + int secure; + mpi_ptr_t rp; + mpi_size_t i; + + secure = _gcry_is_secure (vp); + rp = mpi_alloc_limb_space (usize, secure); + MPN_ZERO (rp, usize); + + for (i = 0; i < vsize * BITS_PER_MPI_LIMB; i++) + { + unsigned int j = vsize * BITS_PER_MPI_LIMB - 1 - i; + unsigned int limbno = j / BITS_PER_MPI_LIMB; + unsigned int bitno = j % BITS_PER_MPI_LIMB; + mpi_limb_t limb = vp[limbno]; + unsigned int the_bit = ((limb & (A_LIMB_1 << bitno)) ? 1 : 0); + mpi_limb_t underflow; + mpi_limb_t overflow; + + overflow = _gcry_mpih_lshift (rp, rp, usize, 1); + rp[0] |= the_bit; + + underflow = _gcry_mpih_sub_n (rp, rp, up, usize); + mpih_add_n_cond (rp, rp, up, usize, overflow ^ underflow); + } + + return rp; +} + +int +_gcry_mpih_cmp_ui (mpi_ptr_t up, mpi_size_t usize, unsigned long v) +{ + int is_all_zero = 1; + mpi_size_t i; + + for (i = 1; i < usize; i++) + is_all_zero &= (up[i] == 0); + + if (is_all_zero) + return up[0] - v; + return 1; +} diff --git a/comm/third_party/libgcrypt/mpi/mpih-div.c b/comm/third_party/libgcrypt/mpi/mpih-div.c new file mode 100644 index 0000000000..57c1b58487 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpih-div.c @@ -0,0 +1,532 @@ +/* mpih-div.c - MPI helper functions + * Copyright (C) 1994, 1996, 1998, 2000, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "longlong.h" + +#ifndef UMUL_TIME +#define UMUL_TIME 1 +#endif +#ifndef UDIV_TIME +#define UDIV_TIME UMUL_TIME +#endif + +/* FIXME: We should be using invert_limb (or invert_normalized_limb) + * here (not udiv_qrnnd). + */ + +mpi_limb_t +_gcry_mpih_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb) +{ + mpi_size_t i; + mpi_limb_t n1, n0, r; + mpi_limb_t dummy GCC_ATTR_UNUSED; + + /* Botch: Should this be handled at all? Rely on callers? */ + if( !dividend_size ) + return 0; + + /* If multiplication is much faster than division, and the + * dividend is large, pre-invert the divisor, and use + * only multiplications in the inner loop. + * + * This test should be read: + * Does it ever help to use udiv_qrnnd_preinv? + * && Does what we save compensate for the inversion overhead? + */ + if( UDIV_TIME > (2 * UMUL_TIME + 6) + && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) { + int normalization_steps; + + count_leading_zeros( normalization_steps, divisor_limb ); + if( normalization_steps ) { + mpi_limb_t divisor_limb_inverted; + + divisor_limb <<= normalization_steps; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + * most significant bit (with weight 2**N) implicit. + * + * Special case for DIVISOR_LIMB == 100...000. + */ + if( !(divisor_limb << 1) ) + divisor_limb_inverted = ~(mpi_limb_t)0; + else + udiv_qrnnd(divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); + + /* Possible optimization: + * if (r == 0 + * && divisor_limb > ((n1 << normalization_steps) + * | (dividend_ptr[dividend_size - 2] >> ...))) + * ...one division less... + */ + for( i = dividend_size - 2; i >= 0; i--) { + n0 = dividend_ptr[i]; + UDIV_QRNND_PREINV(dummy, r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), + divisor_limb, divisor_limb_inverted); + n1 = n0; + } + UDIV_QRNND_PREINV(dummy, r, r, + n1 << normalization_steps, + divisor_limb, divisor_limb_inverted); + return r >> normalization_steps; + } + else { + mpi_limb_t divisor_limb_inverted; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + * most significant bit (with weight 2**N) implicit. + * + * Special case for DIVISOR_LIMB == 100...000. + */ + if( !(divisor_limb << 1) ) + divisor_limb_inverted = ~(mpi_limb_t)0; + else + udiv_qrnnd(divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if( r >= divisor_limb ) + r = 0; + else + i--; + + for( ; i >= 0; i--) { + n0 = dividend_ptr[i]; + UDIV_QRNND_PREINV(dummy, r, r, + n0, divisor_limb, divisor_limb_inverted); + } + return r; + } + } + else { + if( UDIV_NEEDS_NORMALIZATION ) { + int normalization_steps; + + count_leading_zeros(normalization_steps, divisor_limb); + if( normalization_steps ) { + divisor_limb <<= normalization_steps; + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); + + /* Possible optimization: + * if (r == 0 + * && divisor_limb > ((n1 << normalization_steps) + * | (dividend_ptr[dividend_size - 2] >> ...))) + * ...one division less... + */ + for(i = dividend_size - 2; i >= 0; i--) { + n0 = dividend_ptr[i]; + udiv_qrnnd (dummy, r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), + divisor_limb); + n1 = n0; + } + udiv_qrnnd (dummy, r, r, + n1 << normalization_steps, + divisor_limb); + return r >> normalization_steps; + } + } + /* No normalization needed, either because udiv_qrnnd doesn't require + * it, or because DIVISOR_LIMB is already normalized. */ + i = dividend_size - 1; + r = dividend_ptr[i]; + + if(r >= divisor_limb) + r = 0; + else + i--; + + for(; i >= 0; i--) { + n0 = dividend_ptr[i]; + udiv_qrnnd (dummy, r, r, n0, divisor_limb); + } + return r; + } +} + +/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write + * the NSIZE-DSIZE least significant quotient limbs at QP + * and the DSIZE long remainder at NP. If QEXTRA_LIMBS is + * non-zero, generate that many fraction bits and append them after the + * other quotient limbs. + * Return the most significant limb of the quotient, this is always 0 or 1. + * + * Preconditions: + * 0. NSIZE >= DSIZE. + * 1. The most significant bit of the divisor must be set. + * 2. QP must either not overlap with the input operands at all, or + * QP + DSIZE >= NP must hold true. (This means that it's + * possible to put the quotient in the high part of NUM, right after the + * remainder in NUM. + * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. + */ + +mpi_limb_t +_gcry_mpih_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs, + mpi_ptr_t np, mpi_size_t nsize, + mpi_ptr_t dp, mpi_size_t dsize) +{ + mpi_limb_t most_significant_q_limb = 0; + + switch(dsize) { + case 0: + _gcry_divide_by_zero(); + break; + + case 1: + { + mpi_size_t i; + mpi_limb_t n1; + mpi_limb_t d; + + d = dp[0]; + n1 = np[nsize - 1]; + + if( n1 >= d ) { + n1 -= d; + most_significant_q_limb = 1; + } + + qp += qextra_limbs; + for( i = nsize - 2; i >= 0; i--) + udiv_qrnnd( qp[i], n1, n1, np[i], d ); + qp -= qextra_limbs; + + for( i = qextra_limbs - 1; i >= 0; i-- ) + udiv_qrnnd (qp[i], n1, n1, 0, d); + + np[0] = n1; + } + break; + + case 2: + { + mpi_size_t i; + mpi_limb_t n1, n0, n2; + mpi_limb_t d1, d0; + + np += nsize - 2; + d1 = dp[1]; + d0 = dp[0]; + n1 = np[1]; + n0 = np[0]; + + if( n1 >= d1 && (n1 > d1 || n0 >= d0) ) { + sub_ddmmss (n1, n0, n1, n0, d1, d0); + most_significant_q_limb = 1; + } + + for( i = qextra_limbs + nsize - 2 - 1; i >= 0; i-- ) { + mpi_limb_t q; + mpi_limb_t r; + + if( i >= qextra_limbs ) + np--; + else + np[0] = 0; + + if( n1 == d1 ) { + /* Q should be either 111..111 or 111..110. Need special + * treatment of this rare case as normal division would + * give overflow. */ + q = ~(mpi_limb_t)0; + + r = n0 + d1; + if( r < d1 ) { /* Carry in the addition? */ + add_ssaaaa( n1, n0, r - d0, np[0], 0, d0 ); + qp[i] = q; + continue; + } + n1 = d0 - (d0 != 0?1:0); + n0 = -d0; + } + else { + udiv_qrnnd (q, r, n1, n0, d1); + umul_ppmm (n1, n0, d0, q); + } + + n2 = np[0]; + q_test: + if( n1 > r || (n1 == r && n0 > n2) ) { + /* The estimated Q was too large. */ + q--; + sub_ddmmss (n1, n0, n1, n0, 0, d0); + r += d1; + if( r >= d1 ) /* If not carry, test Q again. */ + goto q_test; + } + + qp[i] = q; + sub_ddmmss (n1, n0, r, n2, n1, n0); + } + np[1] = n1; + np[0] = n0; + } + break; + + default: + { + mpi_size_t i; + mpi_limb_t dX, d1, n0; + + np += nsize - dsize; + dX = dp[dsize - 1]; + d1 = dp[dsize - 2]; + n0 = np[dsize - 1]; + + if( n0 >= dX ) { + if(n0 > dX || _gcry_mpih_cmp(np, dp, dsize - 1) >= 0 ) { + _gcry_mpih_sub_n(np, np, dp, dsize); + n0 = np[dsize - 1]; + most_significant_q_limb = 1; + } + } + + for( i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) { + mpi_limb_t q; + mpi_limb_t n1, n2; + mpi_limb_t cy_limb; + + if( i >= qextra_limbs ) { + np--; + n2 = np[dsize]; + } + else { + n2 = np[dsize - 1]; + MPN_COPY_DECR (np + 1, np, dsize - 1); + np[0] = 0; + } + + if( n0 == dX ) { + /* This might over-estimate q, but it's probably not worth + * the extra code here to find out. */ + q = ~(mpi_limb_t)0; + } + else { + mpi_limb_t r; + + udiv_qrnnd(q, r, n0, np[dsize - 1], dX); + umul_ppmm(n1, n0, d1, q); + + while( n1 > r || (n1 == r && n0 > np[dsize - 2])) { + q--; + r += dX; + if( r < dX ) /* I.e. "carry in previous addition?" */ + break; + n1 -= n0 < d1; + n0 -= d1; + } + } + + /* Possible optimization: We already have (q * n0) and (1 * n1) + * after the calculation of q. Taking advantage of that, we + * could make this loop make two iterations less. */ + cy_limb = _gcry_mpih_submul_1(np, dp, dsize, q); + + if( n2 != cy_limb ) { + _gcry_mpih_add_n(np, np, dp, dsize); + q--; + } + + qp[i] = q; + n0 = np[dsize - 1]; + } + } + } + + return most_significant_q_limb; +} + + +/**************** + * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. + * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. + * Return the single-limb remainder. + * There are no constraints on the value of the divisor. + * + * QUOT_PTR and DIVIDEND_PTR might point to the same limb. + */ + +mpi_limb_t +_gcry_mpih_divmod_1( mpi_ptr_t quot_ptr, + mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb) +{ + mpi_size_t i; + mpi_limb_t n1, n0, r; + mpi_limb_t dummy GCC_ATTR_UNUSED; + + if( !dividend_size ) + return 0; + + /* If multiplication is much faster than division, and the + * dividend is large, pre-invert the divisor, and use + * only multiplications in the inner loop. + * + * This test should be read: + * Does it ever help to use udiv_qrnnd_preinv? + * && Does what we save compensate for the inversion overhead? + */ + if( UDIV_TIME > (2 * UMUL_TIME + 6) + && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) { + int normalization_steps; + + count_leading_zeros( normalization_steps, divisor_limb ); + if( normalization_steps ) { + mpi_limb_t divisor_limb_inverted; + + divisor_limb <<= normalization_steps; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + * most significant bit (with weight 2**N) implicit. + */ + /* Special case for DIVISOR_LIMB == 100...000. */ + if( !(divisor_limb << 1) ) + divisor_limb_inverted = ~(mpi_limb_t)0; + else + udiv_qrnnd(divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); + + /* Possible optimization: + * if (r == 0 + * && divisor_limb > ((n1 << normalization_steps) + * | (dividend_ptr[dividend_size - 2] >> ...))) + * ...one division less... + */ + for( i = dividend_size - 2; i >= 0; i--) { + n0 = dividend_ptr[i]; + UDIV_QRNND_PREINV( quot_ptr[i + 1], r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), + divisor_limb, divisor_limb_inverted); + n1 = n0; + } + UDIV_QRNND_PREINV( quot_ptr[0], r, r, + n1 << normalization_steps, + divisor_limb, divisor_limb_inverted); + return r >> normalization_steps; + } + else { + mpi_limb_t divisor_limb_inverted; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + * most significant bit (with weight 2**N) implicit. + */ + /* Special case for DIVISOR_LIMB == 100...000. */ + if( !(divisor_limb << 1) ) + divisor_limb_inverted = ~(mpi_limb_t) 0; + else + udiv_qrnnd(divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if( r >= divisor_limb ) + r = 0; + else + quot_ptr[i--] = 0; + + for( ; i >= 0; i-- ) { + n0 = dividend_ptr[i]; + UDIV_QRNND_PREINV( quot_ptr[i], r, r, + n0, divisor_limb, divisor_limb_inverted); + } + return r; + } + } + else { + if(UDIV_NEEDS_NORMALIZATION) { + int normalization_steps; + + count_leading_zeros (normalization_steps, divisor_limb); + if( normalization_steps ) { + divisor_limb <<= normalization_steps; + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); + + /* Possible optimization: + * if (r == 0 + * && divisor_limb > ((n1 << normalization_steps) + * | (dividend_ptr[dividend_size - 2] >> ...))) + * ...one division less... + */ + for( i = dividend_size - 2; i >= 0; i--) { + n0 = dividend_ptr[i]; + udiv_qrnnd (quot_ptr[i + 1], r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), + divisor_limb); + n1 = n0; + } + udiv_qrnnd (quot_ptr[0], r, r, + n1 << normalization_steps, + divisor_limb); + return r >> normalization_steps; + } + } + /* No normalization needed, either because udiv_qrnnd doesn't require + * it, or because DIVISOR_LIMB is already normalized. */ + i = dividend_size - 1; + r = dividend_ptr[i]; + + if(r >= divisor_limb) + r = 0; + else + quot_ptr[i--] = 0; + + for(; i >= 0; i--) { + n0 = dividend_ptr[i]; + udiv_qrnnd( quot_ptr[i], r, r, n0, divisor_limb ); + } + return r; + } +} diff --git a/comm/third_party/libgcrypt/mpi/mpih-mul.c b/comm/third_party/libgcrypt/mpi/mpih-mul.c new file mode 100644 index 0000000000..8b6f06a30a --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpih-mul.c @@ -0,0 +1,529 @@ +/* mpih-mul.c - MPI helper functions + * Copyright (C) 1994, 1996, 1998, 1999, 2000, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "mpi-internal.h" +#include "longlong.h" +#include "g10lib.h" + +#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ + do { \ + if( (size) < KARATSUBA_THRESHOLD ) \ + mul_n_basecase (prodp, up, vp, size); \ + else \ + mul_n (prodp, up, vp, size, tspace); \ + } while (0); + +#define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \ + do { \ + if ((size) < KARATSUBA_THRESHOLD) \ + _gcry_mpih_sqr_n_basecase (prodp, up, size); \ + else \ + _gcry_mpih_sqr_n (prodp, up, size, tspace); \ + } while (0); + + + + +/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), + * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are + * always stored. Return the most significant limb. + * + * Argument constraints: + * 1. PRODP != UP and PRODP != VP, i.e. the destination + * must be distinct from the multiplier and the multiplicand. + * + * + * Handle simple cases with traditional multiplication. + * + * This is the most critical code of multiplication. All multiplies rely + * on this, both small and huge. Small ones arrive here immediately. Huge + * ones arrive here as this is the base case for Karatsuba's recursive + * algorithm below. + */ + +static mpi_limb_t +mul_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, + mpi_ptr_t vp, mpi_size_t size) +{ + mpi_size_t i; + mpi_limb_t cy; + mpi_limb_t v_limb; + + /* Multiply by the first limb in V separately, as the result can be + * stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = vp[0]; + if( v_limb <= 1 ) { + if( v_limb == 1 ) + MPN_COPY( prodp, up, size ); + else + MPN_ZERO( prodp, size ); + cy = 0; + } + else + cy = _gcry_mpih_mul_1( prodp, up, size, v_limb ); + + prodp[size] = cy; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + * U with one limb from V, and add it to PROD. */ + for( i = 1; i < size; i++ ) { + v_limb = vp[i]; + if( v_limb <= 1 ) { + cy = 0; + if( v_limb == 1 ) + cy = _gcry_mpih_add_n(prodp, prodp, up, size); + } + else + cy = _gcry_mpih_addmul_1(prodp, up, size, v_limb); + + prodp[size] = cy; + prodp++; + } + + return cy; +} + + +static void +mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, + mpi_size_t size, mpi_ptr_t tspace ) +{ + if( size & 1 ) { + /* The size is odd, and the code below doesn't handle that. + * Multiply the least significant (size - 1) limbs with a recursive + * call, and handle the most significant limb of S1 and S2 + * separately. + * A slightly faster way to do this would be to make the Karatsuba + * code below behave as if the size were even, and let it check for + * odd size in the end. I.e., in essence move this code to the end. + * Doing so would save us a recursive call, and potentially make the + * stack grow a lot less. + */ + mpi_size_t esize = size - 1; /* even size */ + mpi_limb_t cy_limb; + + MPN_MUL_N_RECURSE( prodp, up, vp, esize, tspace ); + cy_limb = _gcry_mpih_addmul_1( prodp + esize, up, esize, vp[esize] ); + prodp[esize + esize] = cy_limb; + cy_limb = _gcry_mpih_addmul_1( prodp + esize, vp, size, up[esize] ); + prodp[esize + size] = cy_limb; + } + else { + /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. + * + * Split U in two pieces, U1 and U0, such that + * U = U0 + U1*(B**n), + * and V in V1 and V0, such that + * V = V0 + V1*(B**n). + * + * UV is then computed recursively using the identity + * + * 2n n n n + * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V + * 1 1 1 0 0 1 0 0 + * + * Where B = 2**BITS_PER_MP_LIMB. + */ + mpi_size_t hsize = size >> 1; + mpi_limb_t cy; + int negflg; + + /* Product H. ________________ ________________ + * |_____U1 x V1____||____U0 x V0_____| + * Put result in upper part of PROD and pass low part of TSPACE + * as new TSPACE. + */ + MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, tspace); + + /* Product M. ________________ + * |_(U1-U0)(V0-V1)_| + */ + if( _gcry_mpih_cmp(up + hsize, up, hsize) >= 0 ) { + _gcry_mpih_sub_n(prodp, up + hsize, up, hsize); + negflg = 0; + } + else { + _gcry_mpih_sub_n(prodp, up, up + hsize, hsize); + negflg = 1; + } + if( _gcry_mpih_cmp(vp + hsize, vp, hsize) >= 0 ) { + _gcry_mpih_sub_n(prodp + hsize, vp + hsize, vp, hsize); + negflg ^= 1; + } + else { + _gcry_mpih_sub_n(prodp + hsize, vp, vp + hsize, hsize); + /* No change of NEGFLG. */ + } + /* Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. + */ + MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, tspace + size); + + /* Add/copy product H. */ + MPN_COPY (prodp + hsize, prodp + size, hsize); + cy = _gcry_mpih_add_n( prodp + size, prodp + size, + prodp + size + hsize, hsize); + + /* Add product M (if NEGFLG M is a negative number) */ + if(negflg) + cy -= _gcry_mpih_sub_n(prodp + hsize, prodp + hsize, tspace, size); + else + cy += _gcry_mpih_add_n(prodp + hsize, prodp + hsize, tspace, size); + + /* Product L. ________________ ________________ + * |________________||____U0 x V0_____| + * Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. + */ + MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size); + + /* Add/copy Product L (twice) */ + + cy += _gcry_mpih_add_n(prodp + hsize, prodp + hsize, tspace, size); + if( cy ) + _gcry_mpih_add_1(prodp + hsize + size, prodp + hsize + size, hsize, cy); + + MPN_COPY(prodp, tspace, hsize); + cy = _gcry_mpih_add_n(prodp + hsize, prodp + hsize, tspace + hsize, hsize); + if( cy ) + _gcry_mpih_add_1(prodp + size, prodp + size, size, 1); + } +} + + +void +_gcry_mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size ) +{ + mpi_size_t i; + mpi_limb_t cy_limb; + mpi_limb_t v_limb; + + /* Multiply by the first limb in V separately, as the result can be + * stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = up[0]; + if( v_limb <= 1 ) { + if( v_limb == 1 ) + MPN_COPY( prodp, up, size ); + else + MPN_ZERO(prodp, size); + cy_limb = 0; + } + else + cy_limb = _gcry_mpih_mul_1( prodp, up, size, v_limb ); + + prodp[size] = cy_limb; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + * U with one limb from V, and add it to PROD. */ + for( i=1; i < size; i++) { + v_limb = up[i]; + if( v_limb <= 1 ) { + cy_limb = 0; + if( v_limb == 1 ) + cy_limb = _gcry_mpih_add_n(prodp, prodp, up, size); + } + else + cy_limb = _gcry_mpih_addmul_1(prodp, up, size, v_limb); + + prodp[size] = cy_limb; + prodp++; + } +} + + +void +_gcry_mpih_sqr_n( mpi_ptr_t prodp, + mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace) +{ + if( size & 1 ) { + /* The size is odd, and the code below doesn't handle that. + * Multiply the least significant (size - 1) limbs with a recursive + * call, and handle the most significant limb of S1 and S2 + * separately. + * A slightly faster way to do this would be to make the Karatsuba + * code below behave as if the size were even, and let it check for + * odd size in the end. I.e., in essence move this code to the end. + * Doing so would save us a recursive call, and potentially make the + * stack grow a lot less. + */ + mpi_size_t esize = size - 1; /* even size */ + mpi_limb_t cy_limb; + + MPN_SQR_N_RECURSE( prodp, up, esize, tspace ); + cy_limb = _gcry_mpih_addmul_1( prodp + esize, up, esize, up[esize] ); + prodp[esize + esize] = cy_limb; + cy_limb = _gcry_mpih_addmul_1( prodp + esize, up, size, up[esize] ); + + prodp[esize + size] = cy_limb; + } + else { + mpi_size_t hsize = size >> 1; + mpi_limb_t cy; + + /* Product H. ________________ ________________ + * |_____U1 x U1____||____U0 x U0_____| + * Put result in upper part of PROD and pass low part of TSPACE + * as new TSPACE. + */ + MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace); + + /* Product M. ________________ + * |_(U1-U0)(U0-U1)_| + */ + if( _gcry_mpih_cmp( up + hsize, up, hsize) >= 0 ) + _gcry_mpih_sub_n( prodp, up + hsize, up, hsize); + else + _gcry_mpih_sub_n (prodp, up, up + hsize, hsize); + + /* Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. */ + MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size); + + /* Add/copy product H */ + MPN_COPY(prodp + hsize, prodp + size, hsize); + cy = _gcry_mpih_add_n(prodp + size, prodp + size, + prodp + size + hsize, hsize); + + /* Add product M (if NEGFLG M is a negative number). */ + cy -= _gcry_mpih_sub_n (prodp + hsize, prodp + hsize, tspace, size); + + /* Product L. ________________ ________________ + * |________________||____U0 x U0_____| + * Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. */ + MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size); + + /* Add/copy Product L (twice). */ + cy += _gcry_mpih_add_n (prodp + hsize, prodp + hsize, tspace, size); + if( cy ) + _gcry_mpih_add_1(prodp + hsize + size, prodp + hsize + size, + hsize, cy); + + MPN_COPY(prodp, tspace, hsize); + cy = _gcry_mpih_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize); + if( cy ) + _gcry_mpih_add_1 (prodp + size, prodp + size, size, 1); + } +} + + +/* This should be made into an inline function in gmp.h. */ +void +_gcry_mpih_mul_n( mpi_ptr_t prodp, + mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) +{ + int secure; + + if( up == vp ) { + if( size < KARATSUBA_THRESHOLD ) + _gcry_mpih_sqr_n_basecase( prodp, up, size ); + else { + mpi_ptr_t tspace; + secure = _gcry_is_secure( up ); + tspace = mpi_alloc_limb_space( 2 * size, secure ); + _gcry_mpih_sqr_n( prodp, up, size, tspace ); + _gcry_mpi_free_limb_space (tspace, 2 * size ); + } + } + else { + if( size < KARATSUBA_THRESHOLD ) + mul_n_basecase( prodp, up, vp, size ); + else { + mpi_ptr_t tspace; + secure = _gcry_is_secure( up ) || _gcry_is_secure( vp ); + tspace = mpi_alloc_limb_space( 2 * size, secure ); + mul_n (prodp, up, vp, size, tspace); + _gcry_mpi_free_limb_space (tspace, 2 * size ); + } + } +} + + + +void +_gcry_mpih_mul_karatsuba_case( mpi_ptr_t prodp, + mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize, + struct karatsuba_ctx *ctx ) +{ + mpi_limb_t cy; + + if( !ctx->tspace || ctx->tspace_size < vsize ) { + if( ctx->tspace ) + _gcry_mpi_free_limb_space( ctx->tspace, ctx->tspace_nlimbs ); + ctx->tspace_nlimbs = 2 * vsize; + ctx->tspace = mpi_alloc_limb_space (2 * vsize, + (_gcry_is_secure (up) + || _gcry_is_secure (vp))); + ctx->tspace_size = vsize; + } + + MPN_MUL_N_RECURSE( prodp, up, vp, vsize, ctx->tspace ); + + prodp += vsize; + up += vsize; + usize -= vsize; + if( usize >= vsize ) { + if( !ctx->tp || ctx->tp_size < vsize ) { + if( ctx->tp ) + _gcry_mpi_free_limb_space( ctx->tp, ctx->tp_nlimbs ); + ctx->tp_nlimbs = 2 * vsize; + ctx->tp = mpi_alloc_limb_space (2 * vsize, + (_gcry_is_secure (up) + || _gcry_is_secure (vp))); + ctx->tp_size = vsize; + } + + do { + MPN_MUL_N_RECURSE( ctx->tp, up, vp, vsize, ctx->tspace ); + cy = _gcry_mpih_add_n( prodp, prodp, ctx->tp, vsize ); + _gcry_mpih_add_1( prodp + vsize, ctx->tp + vsize, vsize, cy ); + prodp += vsize; + up += vsize; + usize -= vsize; + } while( usize >= vsize ); + } + + if( usize ) { + if( usize < KARATSUBA_THRESHOLD ) { + _gcry_mpih_mul( ctx->tspace, vp, vsize, up, usize ); + } + else { + if( !ctx->next ) { + ctx->next = xcalloc( 1, sizeof *ctx ); + } + _gcry_mpih_mul_karatsuba_case( ctx->tspace, + vp, vsize, + up, usize, + ctx->next ); + } + + cy = _gcry_mpih_add_n( prodp, prodp, ctx->tspace, vsize); + _gcry_mpih_add_1( prodp + vsize, ctx->tspace + vsize, usize, cy ); + } +} + + +void +_gcry_mpih_release_karatsuba_ctx( struct karatsuba_ctx *ctx ) +{ + struct karatsuba_ctx *ctx2; + + if( ctx->tp ) + _gcry_mpi_free_limb_space( ctx->tp, ctx->tp_nlimbs ); + if( ctx->tspace ) + _gcry_mpi_free_limb_space( ctx->tspace, ctx->tspace_nlimbs ); + for( ctx=ctx->next; ctx; ctx = ctx2 ) { + ctx2 = ctx->next; + if( ctx->tp ) + _gcry_mpi_free_limb_space( ctx->tp, ctx->tp_nlimbs ); + if( ctx->tspace ) + _gcry_mpi_free_limb_space( ctx->tspace, ctx->tspace_nlimbs ); + xfree( ctx ); + } +} + +/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) + * and v (pointed to by VP, with VSIZE limbs), and store the result at + * PRODP. USIZE + VSIZE limbs are always stored, but if the input + * operands are normalized. Return the most significant limb of the + * result. + * + * NOTE: The space pointed to by PRODP is overwritten before finished + * with U and V, so overlap is an error. + * + * Argument constraints: + * 1. USIZE >= VSIZE. + * 2. PRODP != UP and PRODP != VP, i.e. the destination + * must be distinct from the multiplier and the multiplicand. + */ + +mpi_limb_t +_gcry_mpih_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize) +{ + mpi_ptr_t prod_endp = prodp + usize + vsize - 1; + mpi_limb_t cy; + struct karatsuba_ctx ctx; + + if( vsize < KARATSUBA_THRESHOLD ) { + mpi_size_t i; + mpi_limb_t v_limb; + + if( !vsize ) + return 0; + + /* Multiply by the first limb in V separately, as the result can be + * stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = vp[0]; + if( v_limb <= 1 ) { + if( v_limb == 1 ) + MPN_COPY( prodp, up, usize ); + else + MPN_ZERO( prodp, usize ); + cy = 0; + } + else + cy = _gcry_mpih_mul_1( prodp, up, usize, v_limb ); + + prodp[usize] = cy; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + * U with one limb from V, and add it to PROD. */ + for( i = 1; i < vsize; i++ ) { + v_limb = vp[i]; + if( v_limb <= 1 ) { + cy = 0; + if( v_limb == 1 ) + cy = _gcry_mpih_add_n(prodp, prodp, up, usize); + } + else + cy = _gcry_mpih_addmul_1(prodp, up, usize, v_limb); + + prodp[usize] = cy; + prodp++; + } + + return cy; + } + + memset( &ctx, 0, sizeof ctx ); + _gcry_mpih_mul_karatsuba_case( prodp, up, usize, vp, vsize, &ctx ); + _gcry_mpih_release_karatsuba_ctx( &ctx ); + return *prod_endp; +} diff --git a/comm/third_party/libgcrypt/mpi/mpiutil.c b/comm/third_party/libgcrypt/mpi/mpiutil.c new file mode 100644 index 0000000000..86b8361e6c --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/mpiutil.c @@ -0,0 +1,780 @@ +/* mpiutil.ac - Utility functions for MPI + * Copyright (C) 1998, 2000, 2001, 2002, 2003, + * 2007 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "g10lib.h" +#include "mpi-internal.h" +#include "mod-source-info.h" + + +#if SIZEOF_UNSIGNED_INT == 2 +# define MY_UINT_MAX 0xffff +/* (visual check: 0123 ) */ +#elif SIZEOF_UNSIGNED_INT == 4 +# define MY_UINT_MAX 0xffffffff +/* (visual check: 01234567 ) */ +#elif SIZEOF_UNSIGNED_INT == 8 +# define MY_UINT_MAX 0xffffffffffffffff +/* (visual check: 0123456789abcdef ) */ +#else +# error Need MY_UINT_MAX for this limb size +#endif + + +/* Constants allocated right away at startup. */ +static gcry_mpi_t constants[MPI_NUMBER_OF_CONSTANTS]; + + + +const char * +_gcry_mpi_get_hw_config (void) +{ + return mod_source_info + 1; +} + + +/* Initialize the MPI subsystem. This is called early and allows to + do some initialization without taking care of threading issues. */ +gcry_err_code_t +_gcry_mpi_init (void) +{ + int idx; + unsigned long value; + + for (idx=0; idx < MPI_NUMBER_OF_CONSTANTS; idx++) + { + switch (idx) + { + case MPI_C_ZERO: value = 0; break; + case MPI_C_ONE: value = 1; break; + case MPI_C_TWO: value = 2; break; + case MPI_C_THREE: value = 3; break; + case MPI_C_FOUR: value = 4; break; + case MPI_C_EIGHT: value = 8; break; + default: log_bug ("invalid mpi_const selector %d\n", idx); + } + constants[idx] = mpi_alloc_set_ui (value); + constants[idx]->flags = (16|32); + } + + return 0; +} + + +/**************** + * Note: It was a bad idea to use the number of limbs to allocate + * because on a alpha the limbs are large but we normally need + * integers of n bits - So we should change this to bits (or bytes). + * + * But mpi_alloc is used in a lot of places :-(. New code + * should use mpi_new. + */ +gcry_mpi_t +_gcry_mpi_alloc( unsigned nlimbs ) +{ + gcry_mpi_t a; + + a = xmalloc( sizeof *a ); + a->d = nlimbs? mpi_alloc_limb_space( nlimbs, 0 ) : NULL; + a->alloced = nlimbs; + a->nlimbs = 0; + a->sign = 0; + a->flags = 0; + return a; +} + +void +_gcry_mpi_m_check( gcry_mpi_t a ) +{ + _gcry_check_heap(a); + _gcry_check_heap(a->d); +} + +gcry_mpi_t +_gcry_mpi_alloc_secure( unsigned nlimbs ) +{ + gcry_mpi_t a; + + a = xmalloc( sizeof *a ); + a->d = nlimbs? mpi_alloc_limb_space( nlimbs, 1 ) : NULL; + a->alloced = nlimbs; + a->flags = 1; + a->nlimbs = 0; + a->sign = 0; + return a; +} + + + +mpi_ptr_t +_gcry_mpi_alloc_limb_space( unsigned int nlimbs, int secure ) +{ + mpi_ptr_t p; + size_t len; + + len = (nlimbs ? nlimbs : 1) * sizeof (mpi_limb_t); + p = secure ? xmalloc_secure (len) : xmalloc (len); + if (! nlimbs) + *p = 0; + + return p; +} + +void +_gcry_mpi_free_limb_space( mpi_ptr_t a, unsigned int nlimbs) +{ + if (a) + { + size_t len = nlimbs * sizeof(mpi_limb_t); + + /* If we have information on the number of allocated limbs, we + better wipe that space out. This is a failsafe feature if + secure memory has been disabled or was not properly + implemented in user provided allocation functions. */ + if (len) + wipememory (a, len); + xfree(a); + } +} + + +void +_gcry_mpi_assign_limb_space( gcry_mpi_t a, mpi_ptr_t ap, unsigned int nlimbs ) +{ + _gcry_mpi_free_limb_space (a->d, a->alloced); + a->d = ap; + a->alloced = nlimbs; +} + + + +/**************** + * Resize the array of A to NLIMBS. The additional space is cleared + * (set to 0). + */ +void +_gcry_mpi_resize (gcry_mpi_t a, unsigned nlimbs) +{ + size_t i; + + if (nlimbs <= a->alloced) + { + /* We only need to clear the new space (this is a nop if the + limb space is already of the correct size. */ + for (i=a->nlimbs; i < a->alloced; i++) + a->d[i] = 0; + return; + } + + /* Actually resize the limb space. */ + if (a->d) + { + a->d = xrealloc (a->d, nlimbs * sizeof (mpi_limb_t)); + for (i=a->alloced; i < nlimbs; i++) + a->d[i] = 0; + } + else + { + if (a->flags & 1) + /* Secure memory is wanted. */ + a->d = xcalloc_secure (nlimbs , sizeof (mpi_limb_t)); + else + /* Standard memory. */ + a->d = xcalloc (nlimbs , sizeof (mpi_limb_t)); + } + a->alloced = nlimbs; +} + +void +_gcry_mpi_clear( gcry_mpi_t a ) +{ + if (mpi_is_immutable (a)) + { + mpi_immutable_failed (); + return; + } + a->nlimbs = 0; + a->flags = 0; +} + + +void +_gcry_mpi_free( gcry_mpi_t a ) +{ + if (!a ) + return; + if ((a->flags & 32)) + { +#if GPGRT_VERSION_NUMBER >= 0x011600 /* 1.22 */ + gpgrt_annotate_leaked_object(a); +#endif + return; /* Never release a constant. */ + } + if ((a->flags & 4)) + xfree( a->d ); + else + { + _gcry_mpi_free_limb_space(a->d, a->alloced); + } + /* Check that the flags makes sense. We better allow for bit 1 + (value 2) for backward ABI compatibility. */ + if ((a->flags & ~(1|2|4|16 + |GCRYMPI_FLAG_USER1 + |GCRYMPI_FLAG_USER2 + |GCRYMPI_FLAG_USER3 + |GCRYMPI_FLAG_USER4))) + log_bug("invalid flag value in mpi_free\n"); + xfree (a); +} + + +void +_gcry_mpi_immutable_failed (void) +{ + log_info ("Warning: trying to change an immutable MPI\n"); +} + + +static void +mpi_set_secure( gcry_mpi_t a ) +{ + mpi_ptr_t ap, bp; + + if ( (a->flags & 1) ) + return; + a->flags |= 1; + ap = a->d; + if (!a->nlimbs) + { + gcry_assert (!ap); + return; + } + bp = mpi_alloc_limb_space (a->alloced, 1); + MPN_COPY( bp, ap, a->nlimbs ); + a->d = bp; + _gcry_mpi_free_limb_space (ap, a->alloced); +} + + +gcry_mpi_t +_gcry_mpi_set_opaque (gcry_mpi_t a, void *p, unsigned int nbits) +{ + if (!a) + a = mpi_alloc(0); + + if (mpi_is_immutable (a)) + { + mpi_immutable_failed (); + return a; + } + + if( a->flags & 4 ) + xfree (a->d); + else + _gcry_mpi_free_limb_space (a->d, a->alloced); + + a->d = p; + a->alloced = 0; + a->nlimbs = 0; + a->sign = nbits; + a->flags = 4 | (a->flags & (GCRYMPI_FLAG_USER1|GCRYMPI_FLAG_USER2 + |GCRYMPI_FLAG_USER3|GCRYMPI_FLAG_USER4)); + if (_gcry_is_secure (a->d)) + a->flags |= 1; + return a; +} + + +gcry_mpi_t +_gcry_mpi_set_opaque_copy (gcry_mpi_t a, const void *p, unsigned int nbits) +{ + void *d; + unsigned int n; + + n = (nbits+7)/8; + d = _gcry_is_secure (p)? xtrymalloc_secure (n) : xtrymalloc (n); + if (!d) + return NULL; + memcpy (d, p, n); + return mpi_set_opaque (a, d, nbits); +} + + +void * +_gcry_mpi_get_opaque (gcry_mpi_t a, unsigned int *nbits) +{ + if( !(a->flags & 4) ) + log_bug("mpi_get_opaque on normal mpi\n"); + if( nbits ) + *nbits = a->sign; + return a->d; +} + + +void * +_gcry_mpi_get_opaque_copy (gcry_mpi_t a, unsigned int *nbits) +{ + const void *s; + void *d; + unsigned int n; + + s = mpi_get_opaque (a, nbits); + if (!s && nbits) + return NULL; + n = (*nbits+7)/8; + d = _gcry_is_secure (s)? xtrymalloc_secure (n) : xtrymalloc (n); + if (d) + memcpy (d, s, n); + return d; +} + +/**************** + * Note: This copy function should not interpret the MPI + * but copy it transparently. + */ +gcry_mpi_t +_gcry_mpi_copy (gcry_mpi_t a) +{ + int i; + gcry_mpi_t b; + + if( a && (a->flags & 4) ) { + void *p = _gcry_is_secure(a->d)? xmalloc_secure ((a->sign+7)/8) + : xmalloc ((a->sign+7)/8); + if (a->d) + memcpy( p, a->d, (a->sign+7)/8 ); + b = mpi_set_opaque( NULL, p, a->sign ); + b->flags = a->flags; + b->flags &= ~(16|32); /* Reset the immutable and constant flags. */ + } + else if( a ) { + b = mpi_is_secure(a)? mpi_alloc_secure( a->nlimbs ) + : mpi_alloc( a->nlimbs ); + b->nlimbs = a->nlimbs; + b->sign = a->sign; + b->flags = a->flags; + b->flags &= ~(16|32); /* Reset the immutable and constant flags. */ + for(i=0; i < b->nlimbs; i++ ) + b->d[i] = a->d[i]; + } + else + b = NULL; + return b; +} + + +/* Return true if A is negative. */ +int +_gcry_mpi_is_neg (gcry_mpi_t a) +{ + if (a->sign && _gcry_mpi_cmp_ui (a, 0)) + return 1; + else + return 0; +} + + +/* W = - U */ +void +_gcry_mpi_neg (gcry_mpi_t w, gcry_mpi_t u) +{ + if (w != u) + mpi_set (w, u); + else if (mpi_is_immutable (w)) + { + mpi_immutable_failed (); + return; + } + + w->sign = !u->sign; +} + + +/* W = [W] */ +void +_gcry_mpi_abs (gcry_mpi_t w) +{ + if (mpi_is_immutable (w)) + { + mpi_immutable_failed (); + return; + } + + w->sign = 0; +} + + +/**************** + * This function allocates an MPI which is optimized to hold + * a value as large as the one given in the argument and allocates it + * with the same flags as A. + */ +gcry_mpi_t +_gcry_mpi_alloc_like( gcry_mpi_t a ) +{ + gcry_mpi_t b; + + if( a && (a->flags & 4) ) { + int n = (a->sign+7)/8; + void *p = _gcry_is_secure(a->d)? xtrymalloc_secure (n) + : xtrymalloc (n); + memcpy( p, a->d, n ); + b = mpi_set_opaque( NULL, p, a->sign ); + } + else if( a ) { + b = mpi_is_secure(a)? mpi_alloc_secure( a->nlimbs ) + : mpi_alloc( a->nlimbs ); + b->nlimbs = 0; + b->sign = 0; + b->flags = a->flags; + } + else + b = NULL; + return b; +} + + +/* Set U into W and release U. If W is NULL only U will be released. */ +void +_gcry_mpi_snatch (gcry_mpi_t w, gcry_mpi_t u) +{ + if (w) + { + if (mpi_is_immutable (w)) + { + mpi_immutable_failed (); + return; + } + _gcry_mpi_assign_limb_space (w, u->d, u->alloced); + w->nlimbs = u->nlimbs; + w->sign = u->sign; + w->flags = u->flags; + u->alloced = 0; + u->nlimbs = 0; + u->d = NULL; + } + _gcry_mpi_free (u); +} + + +gcry_mpi_t +_gcry_mpi_set (gcry_mpi_t w, gcry_mpi_t u) +{ + mpi_ptr_t wp, up; + mpi_size_t usize = u->nlimbs; + int usign = u->sign; + + if (!w) + w = _gcry_mpi_alloc( mpi_get_nlimbs(u) ); + if (mpi_is_immutable (w)) + { + mpi_immutable_failed (); + return w; + } + RESIZE_IF_NEEDED(w, usize); + wp = w->d; + up = u->d; + MPN_COPY( wp, up, usize ); + w->nlimbs = usize; + w->flags = u->flags; + w->flags &= ~(16|32); /* Reset the immutable and constant flags. */ + w->sign = usign; + return w; +} + +/**************** + * Set the value of W by the one of U, when SET is 1. + * Leave the value when SET is 0. + * This implementation should be constant-time regardless of SET. + */ +gcry_mpi_t +_gcry_mpi_set_cond (gcry_mpi_t w, const gcry_mpi_t u, unsigned long set) +{ + mpi_size_t i; + mpi_size_t nlimbs = u->alloced; + mpi_limb_t mask = ((mpi_limb_t)0) - set; + mpi_limb_t x; + + if (w->alloced != u->alloced) + log_bug ("mpi_set_cond: different sizes\n"); + + for (i = 0; i < nlimbs; i++) + { + x = mask & (w->d[i] ^ u->d[i]); + w->d[i] = w->d[i] ^ x; + } + + x = mask & (w->nlimbs ^ u->nlimbs); + w->nlimbs = w->nlimbs ^ x; + + x = mask & (w->sign ^ u->sign); + w->sign = w->sign ^ x; + return w; +} + + +gcry_mpi_t +_gcry_mpi_set_ui (gcry_mpi_t w, unsigned long u) +{ + if (!w) + w = _gcry_mpi_alloc (1); + /* FIXME: If U is 0 we have no need to resize and thus possible + allocating the the limbs. */ + if (mpi_is_immutable (w)) + { + mpi_immutable_failed (); + return w; + } + RESIZE_IF_NEEDED(w, 1); + w->d[0] = u; + w->nlimbs = u? 1:0; + w->sign = 0; + w->flags = 0; + return w; +} + +/* If U is non-negative and small enough store it as an unsigned int + * at W. If the value does not fit into an unsigned int or is + * negative return GPG_ERR_ERANGE. Note that we return an unsigned + * int so that the value can be used with the bit test functions; in + * contrast the other _ui functions take an unsigned long so that on + * some platforms they may accept a larger value. On error the value + * at W is not changed. */ +gcry_err_code_t +_gcry_mpi_get_ui (unsigned int *w, gcry_mpi_t u) +{ + mpi_limb_t x; + + if (u->nlimbs > 1 || u->sign) + return GPG_ERR_ERANGE; + + x = (u->nlimbs == 1) ? u->d[0] : 0; + if (sizeof (x) > sizeof (unsigned int) && x > MY_UINT_MAX) + return GPG_ERR_ERANGE; + + *w = x; + return 0; +} + + +gcry_mpi_t +_gcry_mpi_alloc_set_ui( unsigned long u) +{ + gcry_mpi_t w = mpi_alloc(1); + w->d[0] = u; + w->nlimbs = u? 1:0; + w->sign = 0; + return w; +} + +void +_gcry_mpi_swap (gcry_mpi_t a, gcry_mpi_t b) +{ + struct gcry_mpi tmp; + + tmp = *a; *a = *b; *b = tmp; +} + + +/**************** + * Swap the value of A and B, when SWAP is 1. + * Leave the value when SWAP is 0. + * This implementation should be constant-time regardless of SWAP. + */ +void +_gcry_mpi_swap_cond (gcry_mpi_t a, gcry_mpi_t b, unsigned long swap) +{ + mpi_size_t i; + mpi_size_t nlimbs; + mpi_limb_t mask = ((mpi_limb_t)0) - swap; + mpi_limb_t x; + + if (a->alloced > b->alloced) + nlimbs = b->alloced; + else + nlimbs = a->alloced; + if (a->nlimbs > nlimbs || b->nlimbs > nlimbs) + log_bug ("mpi_swap_cond: different sizes\n"); + + for (i = 0; i < nlimbs; i++) + { + x = mask & (a->d[i] ^ b->d[i]); + a->d[i] = a->d[i] ^ x; + b->d[i] = b->d[i] ^ x; + } + + x = mask & (a->nlimbs ^ b->nlimbs); + a->nlimbs = a->nlimbs ^ x; + b->nlimbs = b->nlimbs ^ x; + + x = mask & (a->sign ^ b->sign); + a->sign = a->sign ^ x; + b->sign = b->sign ^ x; +} + + +/**************** + * Set bit N of A, when SET is 1. + * This implementation should be constant-time regardless of SET. + */ +void +_gcry_mpi_set_bit_cond (gcry_mpi_t a, unsigned int n, unsigned long set) +{ + unsigned int limbno, bitno; + mpi_limb_t set_the_bit = !!set; + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + a->d[limbno] |= (set_the_bit<<bitno); +} + + +gcry_mpi_t +_gcry_mpi_new (unsigned int nbits) +{ + return _gcry_mpi_alloc ( (nbits+BITS_PER_MPI_LIMB-1) + / BITS_PER_MPI_LIMB ); +} + + +gcry_mpi_t +_gcry_mpi_snew (unsigned int nbits) +{ + return _gcry_mpi_alloc_secure ( (nbits+BITS_PER_MPI_LIMB-1) + / BITS_PER_MPI_LIMB ); +} + +void +_gcry_mpi_release( gcry_mpi_t a ) +{ + _gcry_mpi_free( a ); +} + +void +_gcry_mpi_randomize (gcry_mpi_t w, + unsigned int nbits, enum gcry_random_level level) +{ + unsigned char *p; + size_t nbytes = (nbits+7)/8; + + if (mpi_is_immutable (w)) + { + mpi_immutable_failed (); + return; + } + if (level == GCRY_WEAK_RANDOM) + { + p = mpi_is_secure(w) ? xmalloc_secure (nbytes) + : xmalloc (nbytes); + _gcry_create_nonce (p, nbytes); + } + else + { + p = mpi_is_secure(w) ? _gcry_random_bytes_secure (nbytes, level) + : _gcry_random_bytes (nbytes, level); + } + _gcry_mpi_set_buffer( w, p, nbytes, 0 ); + xfree (p); +} + + +void +_gcry_mpi_set_flag (gcry_mpi_t a, enum gcry_mpi_flag flag) +{ + switch (flag) + { + case GCRYMPI_FLAG_SECURE: mpi_set_secure(a); break; + case GCRYMPI_FLAG_CONST: a->flags |= (16|32); break; + case GCRYMPI_FLAG_IMMUTABLE: a->flags |= 16; break; + + case GCRYMPI_FLAG_USER1: + case GCRYMPI_FLAG_USER2: + case GCRYMPI_FLAG_USER3: + case GCRYMPI_FLAG_USER4: a->flags |= flag; break; + + case GCRYMPI_FLAG_OPAQUE: + default: log_bug("invalid flag value\n"); + } +} + +void +_gcry_mpi_clear_flag (gcry_mpi_t a, enum gcry_mpi_flag flag) +{ + (void)a; /* Not yet used. */ + + switch (flag) + { + case GCRYMPI_FLAG_IMMUTABLE: + if (!(a->flags & 32)) + a->flags &= ~16; + break; + + case GCRYMPI_FLAG_USER1: + case GCRYMPI_FLAG_USER2: + case GCRYMPI_FLAG_USER3: + case GCRYMPI_FLAG_USER4: + a->flags &= ~flag; + break; + + case GCRYMPI_FLAG_CONST: + case GCRYMPI_FLAG_SECURE: + case GCRYMPI_FLAG_OPAQUE: + default: log_bug("invalid flag value\n"); + } +} + +int +_gcry_mpi_get_flag (gcry_mpi_t a, enum gcry_mpi_flag flag) +{ + switch (flag) + { + case GCRYMPI_FLAG_SECURE: return !!(a->flags & 1); + case GCRYMPI_FLAG_OPAQUE: return !!(a->flags & 4); + case GCRYMPI_FLAG_IMMUTABLE: return !!(a->flags & 16); + case GCRYMPI_FLAG_CONST: return !!(a->flags & 32); + case GCRYMPI_FLAG_USER1: + case GCRYMPI_FLAG_USER2: + case GCRYMPI_FLAG_USER3: + case GCRYMPI_FLAG_USER4: return !!(a->flags & flag); + default: log_bug("invalid flag value\n"); + } + /*NOTREACHED*/ + return 0; +} + + +/* Return a constant MPI descripbed by NO which is one of the + MPI_C_xxx macros. There is no need to copy this returned value; it + may be used directly. */ +gcry_mpi_t +_gcry_mpi_const (enum gcry_mpi_constants no) +{ + if ((int)no < 0 || no > MPI_NUMBER_OF_CONSTANTS) + log_bug("invalid mpi_const selector %d\n", no); + if (!constants[no]) + log_bug("MPI subsystem not initialized\n"); + return constants[no]; +} diff --git a/comm/third_party/libgcrypt/mpi/pa7100/distfiles b/comm/third_party/libgcrypt/mpi/pa7100/distfiles new file mode 100644 index 0000000000..fece94310d --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/pa7100/distfiles @@ -0,0 +1,3 @@ +mpih-lshift.S +mpih-rshift.S + diff --git a/comm/third_party/libgcrypt/mpi/pa7100/mpih-lshift.S b/comm/third_party/libgcrypt/mpi/pa7100/mpih-lshift.S new file mode 100644 index 0000000000..8ade19643e --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/pa7100/mpih-lshift.S @@ -0,0 +1,96 @@ +/* hppa lshift + * optimized for the PA7100, where it runs at 3.25 cycles/limb + * + * Copyright (C) 1992, 1994, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + + +/******************* + * mpi_limb_t + * _gcry_mpih_lshift( mpi_ptr_t wp, (gr26) + * mpi_ptr_t up, (gr25) + * mpi_size_t usize, (gr24) + * unsigned cnt) (gr23) + */ + + .code + .export _gcry_mpih_lshift + .label _gcry_mpih_lshift + .proc + .callinfo frame=64,no_calls + .entry + + sh2add %r24,%r25,%r25 + sh2add %r24,%r26,%r26 + ldws,mb -4(0,%r25),%r22 + subi 32,%r23,%r1 + mtsar %r1 + addib,= -1,%r24,L$0004 + vshd %r0,%r22,%r28 ; compute carry out limb + ldws,mb -4(0,%r25),%r29 + addib,<= -5,%r24,L$rest + vshd %r22,%r29,%r20 + + .label L$loop + ldws,mb -4(0,%r25),%r22 + stws,mb %r20,-4(0,%r26) + vshd %r29,%r22,%r20 + ldws,mb -4(0,%r25),%r29 + stws,mb %r20,-4(0,%r26) + vshd %r22,%r29,%r20 + ldws,mb -4(0,%r25),%r22 + stws,mb %r20,-4(0,%r26) + vshd %r29,%r22,%r20 + ldws,mb -4(0,%r25),%r29 + stws,mb %r20,-4(0,%r26) + addib,> -4,%r24,L$loop + vshd %r22,%r29,%r20 + + .label L$rest + addib,= 4,%r24,L$end1 + nop + .label L$eloop + ldws,mb -4(0,%r25),%r22 + stws,mb %r20,-4(0,%r26) + addib,<= -1,%r24,L$end2 + vshd %r29,%r22,%r20 + ldws,mb -4(0,%r25),%r29 + stws,mb %r20,-4(0,%r26) + addib,> -1,%r24,L$eloop + vshd %r22,%r29,%r20 + + .label L$end1 + stws,mb %r20,-4(0,%r26) + vshd %r29,%r0,%r20 + bv 0(%r2) + stw %r20,-4(0,%r26) + .label L$end2 + stws,mb %r20,-4(0,%r26) + .label L$0004 + vshd %r22,%r0,%r20 + bv 0(%r2) + stw %r20,-4(0,%r26) + + .exit + .procend + + + diff --git a/comm/third_party/libgcrypt/mpi/pa7100/mpih-rshift.S b/comm/third_party/libgcrypt/mpi/pa7100/mpih-rshift.S new file mode 100644 index 0000000000..0624202725 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/pa7100/mpih-rshift.S @@ -0,0 +1,92 @@ +/* hppa rshift + * optimized for the PA7100, where it runs at 3.25 cycles/limb + * + * Copyright (C) 1992, 1994, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + + +/******************* + * mpi_limb_t + * _gcry_mpih_rshift( mpi_ptr_t wp, (gr26) + * mpi_ptr_t up, (gr25) + * mpi_size_t usize, (gr24) + * unsigned cnt) (gr23) + */ + + .code + .export _gcry_mpih_rshift + .label _gcry_mpih_rshift + .proc + .callinfo frame=64,no_calls + .entry + + ldws,ma 4(0,%r25),%r22 + mtsar %r23 + addib,= -1,%r24,L$r004 + vshd %r22,%r0,%r28 ; compute carry out limb + ldws,ma 4(0,%r25),%r29 + addib,<= -5,%r24,L$rrest + vshd %r29,%r22,%r20 + + .label L$roop + ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + vshd %r29,%r22,%r20 + ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + addib,> -4,%r24,L$roop + vshd %r29,%r22,%r20 + + .label L$rrest + addib,= 4,%r24,L$rend1 + nop + .label L$eroop + ldws,ma 4(0,%r25),%r22 + stws,ma %r20,4(0,%r26) + addib,<= -1,%r24,L$rend2 + vshd %r22,%r29,%r20 + ldws,ma 4(0,%r25),%r29 + stws,ma %r20,4(0,%r26) + addib,> -1,%r24,L$eroop + vshd %r29,%r22,%r20 + + .label L$rend1 + stws,ma %r20,4(0,%r26) + vshd %r0,%r29,%r20 + bv 0(%r2) + stw %r20,0(0,%r26) + .label L$rend2 + stws,ma %r20,4(0,%r26) + .label L$r004 + vshd %r0,%r22,%r20 + bv 0(%r2) + stw %r20,0(0,%r26) + + .exit + .procend + + diff --git a/comm/third_party/libgcrypt/mpi/pentium4/README b/comm/third_party/libgcrypt/mpi/pentium4/README new file mode 100644 index 0000000000..215fc7f8bf --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/pentium4/README @@ -0,0 +1,115 @@ +Copyright 2001 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301, USA. + + + + + INTEL PENTIUM-4 MPN SUBROUTINES + + +This directory contains mpn functions optimized for Intel Pentium-4. + +The mmx subdirectory has routines using MMX instructions, the sse2 +subdirectory has routines using SSE2 instructions. All P4s have these, the +separate directories are just so configure can omit that code if the +assembler doesn't support it. + + +STATUS + + cycles/limb + + mpn_add_n/sub_n 4 normal, 6 in-place + + mpn_mul_1 4 normal, 6 in-place + mpn_addmul_1 6 + mpn_submul_1 7 + + mpn_mul_basecase 6 cycles/crossproduct (approx) + + mpn_sqr_basecase 3.5 cycles/crossproduct (approx) + or 7.0 cycles/triangleproduct (approx) + + mpn_l/rshift 1.75 + + + +The shifts ought to be able to go at 1.5 c/l, but not much effort has been +applied to them yet. + +In-place operations, and all addmul, submul, mul_basecase and sqr_basecase +calls, suffer from pipeline anomalies associated with write combining and +movd reads and writes to the same or nearby locations. The movq +instructions do not trigger the same hardware problems. Unfortunately, +using movq and splitting/combining seems to require too many extra +instructions to help. Perhaps future chip steppings will be better. + + + +NOTES + +The Pentium-4 pipeline "Netburst", provides for quite a number of surprises. +Many traditional x86 instructions run very slowly, requiring use of +alterative instructions for acceptable performance. + +adcl and sbbl are quite slow at 8 cycles for reg->reg. paddq of 32-bits +within a 64-bit mmx register seems better, though the combination +paddq/psrlq when propagating a carry is still a 4 cycle latency. + +incl and decl should be avoided, instead use add $1 and sub $1. Apparently +the carry flag is not separately renamed, so incl and decl depend on all +previous flags-setting instructions. + +shll and shrl have a 4 cycle latency, or 8 times the latency of the fastest +integer instructions (addl, subl, orl, andl, and some more). shldl and +shrdl seem to have 13 and 15 cycles latency, respectively. Bizarre. + +movq mmx -> mmx does have 6 cycle latency, as noted in the documentation. +pxor/por or similar combination at 2 cycles latency can be used instead. +The movq however executes in the float unit, thereby saving MMX execution +resources. With the right juggling, data moves shouldn't be on a dependent +chain. + +L1 is write-through, but the write-combining sounds like it does enough to +not require explicit destination prefetching. + +xmm registers so far haven't found a use, but not much effort has been +expended. A configure test for whether the operating system knows +fxsave/fxrestor will be needed if they're used. + + + +REFERENCES + +Intel Pentium-4 processor manuals, + + http://developer.intel.com/design/pentium4/manuals + +"Intel Pentium 4 Processor Optimization Reference Manual", Intel, 2001, +order number 248966. Available on-line: + + http://developer.intel.com/design/pentium4/manuals/248966.htm + + + +---------------- +Local variables: +mode: text +fill-column: 76 +End: diff --git a/comm/third_party/libgcrypt/mpi/pentium4/distfiles b/comm/third_party/libgcrypt/mpi/pentium4/distfiles new file mode 100644 index 0000000000..b419f85a9a --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/pentium4/distfiles @@ -0,0 +1,3 @@ +README + + diff --git a/comm/third_party/libgcrypt/mpi/pentium4/mmx/distfiles b/comm/third_party/libgcrypt/mpi/pentium4/mmx/distfiles new file mode 100644 index 0000000000..8f0ea426db --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/pentium4/mmx/distfiles @@ -0,0 +1,2 @@ +mpih-lshift.S +mpih-rshift.S diff --git a/comm/third_party/libgcrypt/mpi/pentium4/mmx/mpih-lshift.S b/comm/third_party/libgcrypt/mpi/pentium4/mmx/mpih-lshift.S new file mode 100644 index 0000000000..e2dd184ba3 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/pentium4/mmx/mpih-lshift.S @@ -0,0 +1,457 @@ +/* Intel Pentium-4 mpn_lshift -- left shift. + * + * Copyright 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_lshift( mpi_ptr_t wp, (sp + 4) + * mpi_ptr_t up, (sp + 8) + * mpi_size_t usize, (sp + 12) + * unsigned cnt) (sp + 16) + * + * P4 Willamette, Northwood: 1.75 cycles/limb + * P4 Prescott: 2.0 cycles/limb + */ + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(_gcry_mpih_lshift) +C_SYMBOL_NAME(_gcry_mpih_lshift:) + + + pushl %ebx + pushl %edi + + + movl 20(%esp), %eax + movl 12(%esp), %edx + + movl 16(%esp), %ebx + movl 24(%esp), %ecx + + cmp $5, %eax + jae .Lunroll + + movl -4(%ebx,%eax,4), %edi + decl %eax + + jnz .Lsimple + + shldl %cl, %edi, %eax + + shll %cl, %edi + + movl %edi, (%edx) + popl %edi + + popl %ebx + + ret + + + + + +.Lsimple: + + + + + + + + + + movd (%ebx,%eax,4), %mm5 + + movd %ecx, %mm6 + negl %ecx + + psllq %mm6, %mm5 + addl $32, %ecx + + movd %ecx, %mm7 + psrlq $32, %mm5 + + +.Lsimple_top: + + + + + + + + + + + + + movq -4(%ebx,%eax,4), %mm0 + decl %eax + + psrlq %mm7, %mm0 + + + + movd %mm0, 4(%edx,%eax,4) + jnz .Lsimple_top + + + movd (%ebx), %mm0 + + movd %mm5, %eax + psllq %mm6, %mm0 + + popl %edi + popl %ebx + + movd %mm0, (%edx) + + emms + + ret + + + + + + .align 8, 0x90 +.Lunroll: + + + + + + + + + + movd -4(%ebx,%eax,4), %mm5 + leal (%ebx,%eax,4), %edi + + movd %ecx, %mm6 + andl $4, %edi + + psllq %mm6, %mm5 + jz .Lstart_src_aligned + + + + + + + + + + + + + + + + + + + + movq -8(%ebx,%eax,4), %mm0 + + psllq %mm6, %mm0 + decl %eax + + psrlq $32, %mm0 + + + + movd %mm0, (%edx,%eax,4) +.Lstart_src_aligned: + + movq -8(%ebx,%eax,4), %mm1 + leal (%edx,%eax,4), %edi + + andl $4, %edi + psrlq $32, %mm5 + + movq -16(%ebx,%eax,4), %mm3 + jz .Lstart_dst_aligned + + + + + + + + + + + + + + + + + + + + + movq %mm1, %mm0 + addl $32, %ecx + + psllq %mm6, %mm0 + + movd %ecx, %mm6 + psrlq $32, %mm0 + + + + movd %mm0, -4(%edx,%eax,4) + subl $4, %edx +.Lstart_dst_aligned: + + + psllq %mm6, %mm1 + negl %ecx + + addl $64, %ecx + movq %mm3, %mm2 + + movd %ecx, %mm7 + subl $8, %eax + + psrlq %mm7, %mm3 + + por %mm1, %mm3 + jc .Lfinish + + + + + .align 8, 0x90 +.Lunroll_loop: + + + + + + + + + + + + + + + + + movq 8(%ebx,%eax,4), %mm0 + psllq %mm6, %mm2 + + movq %mm0, %mm1 + psrlq %mm7, %mm0 + + movq %mm3, 24(%edx,%eax,4) + por %mm2, %mm0 + + movq (%ebx,%eax,4), %mm3 + psllq %mm6, %mm1 + + movq %mm0, 16(%edx,%eax,4) + movq %mm3, %mm2 + + psrlq %mm7, %mm3 + subl $4, %eax + + por %mm1, %mm3 + jnc .Lunroll_loop + + + +.Lfinish: + + + testb $2, %al + + jz .Lfinish_no_two + + movq 8(%ebx,%eax,4), %mm0 + psllq %mm6, %mm2 + + movq %mm0, %mm1 + psrlq %mm7, %mm0 + + movq %mm3, 24(%edx,%eax,4) + por %mm2, %mm0 + + movq %mm1, %mm2 + movq %mm0, %mm3 + + subl $2, %eax +.Lfinish_no_two: + + + + + + + + testb $1, %al + movd %mm5, %eax + + popl %edi + jz .Lfinish_zero + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + movd (%ebx), %mm0 + psllq %mm6, %mm2 + + movq %mm3, 12(%edx) + psllq $32, %mm0 + + movq %mm0, %mm1 + psrlq %mm7, %mm0 + + por %mm2, %mm0 + psllq %mm6, %mm1 + + movq %mm0, 4(%edx) + psrlq $32, %mm1 + + andl $32, %ecx + popl %ebx + + jz .Lfinish_one_unaligned + + movd %mm1, (%edx) +.Lfinish_one_unaligned: + + emms + + ret + + + + +.Lfinish_zero: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + movq %mm3, 8(%edx) + andl $32, %ecx + + psllq %mm6, %mm2 + jz .Lfinish_zero_unaligned + + movq %mm2, (%edx) +.Lfinish_zero_unaligned: + + psrlq $32, %mm2 + popl %ebx + + movd %mm5, %eax + + movd %mm2, 4(%edx) + + emms + + ret diff --git a/comm/third_party/libgcrypt/mpi/pentium4/mmx/mpih-rshift.S b/comm/third_party/libgcrypt/mpi/pentium4/mmx/mpih-rshift.S new file mode 100644 index 0000000000..e3374e3ba3 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/pentium4/mmx/mpih-rshift.S @@ -0,0 +1,453 @@ +/* Intel Pentium-4 mpn_rshift -- right shift. + * + * Copyright 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_rshift( mpi_ptr_t wp, (sp + 4) + * mpi_ptr_t up, (sp + 8) + * mpi_size_t usize, (sp + 12) + * unsigned cnt) (sp + 16) + * + * P4 Willamette, Northwood: 1.75 cycles/limb + * P4 Prescott: 2.0 cycles/limb + */ + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(_gcry_mpih_rshift) +C_SYMBOL_NAME(_gcry_mpih_rshift:) + pushl %ebx + pushl %edi + + + movl 20(%esp), %eax + movl 12(%esp), %edx + + movl 16(%esp), %ebx + movl 24(%esp), %ecx + + cmp $5, %eax + jae .Lunroll + + decl %eax + movl (%ebx), %edi + + jnz .Lsimple + + shrdl %cl, %edi, %eax + + shrl %cl, %edi + + movl %edi, (%edx) + popl %edi + + popl %ebx + + ret + + + + + + .align 8, 0x90 +.Lsimple: + + + + + + + + + + movd (%ebx), %mm5 + leal (%ebx,%eax,4), %ebx + + movd %ecx, %mm6 + leal -4(%edx,%eax,4), %edx + + psllq $32, %mm5 + negl %eax + + + + + + + +.Lsimple_top: + + + + + + + + + + movq (%ebx,%eax,4), %mm0 + incl %eax + + psrlq %mm6, %mm0 + + movd %mm0, (%edx,%eax,4) + jnz .Lsimple_top + + + movd (%ebx), %mm0 + psrlq %mm6, %mm5 + + psrlq %mm6, %mm0 + popl %edi + + movd %mm5, %eax + popl %ebx + + movd %mm0, 4(%edx) + + emms + + ret + + + + + + .align 8, 0x90 +.Lunroll: + + + + + + + + + + movd (%ebx), %mm5 + movl $4, %edi + + movd %ecx, %mm6 + testl %edi, %ebx + + psllq $32, %mm5 + jz .Lstart_src_aligned + + + + + + + + + + + + + + + + + movq (%ebx), %mm0 + + psrlq %mm6, %mm0 + addl $4, %ebx + + decl %eax + + movd %mm0, (%edx) + addl $4, %edx +.Lstart_src_aligned: + + + movq (%ebx), %mm1 + testl %edi, %edx + + psrlq %mm6, %mm5 + jz .Lstart_dst_aligned + + + + + + + + + + + + + + + + + + movq %mm1, %mm0 + addl $32, %ecx + + psrlq %mm6, %mm0 + + movd %ecx, %mm6 + + movd %mm0, (%edx) + addl $4, %edx +.Lstart_dst_aligned: + + + movq 8(%ebx), %mm3 + negl %ecx + + movq %mm3, %mm2 + addl $64, %ecx + + movd %ecx, %mm7 + psrlq %mm6, %mm1 + + leal -12(%ebx,%eax,4), %ebx + leal -20(%edx,%eax,4), %edx + + psllq %mm7, %mm3 + subl $7, %eax + + por %mm1, %mm3 + negl %eax + + jns .Lfinish + + + + + + + + + + + + + + + + .align 8, 0x90 +.Lunroll_loop: + + + + + + + + + + + + + + + + + movq (%ebx,%eax,4), %mm0 + psrlq %mm6, %mm2 + + movq %mm0, %mm1 + psllq %mm7, %mm0 + + movq %mm3, -8(%edx,%eax,4) + por %mm2, %mm0 + + movq 8(%ebx,%eax,4), %mm3 + psrlq %mm6, %mm1 + + movq %mm0, (%edx,%eax,4) + movq %mm3, %mm2 + + psllq %mm7, %mm3 + addl $4, %eax + + por %mm1, %mm3 + js .Lunroll_loop + + +.Lfinish: + + + testb $2, %al + + jnz .Lfinish_no_two + + movq (%ebx,%eax,4), %mm0 + psrlq %mm6, %mm2 + + movq %mm0, %mm1 + psllq %mm7, %mm0 + + movq %mm3, -8(%edx,%eax,4) + por %mm2, %mm0 + + movq %mm1, %mm2 + movq %mm0, %mm3 + + addl $2, %eax +.Lfinish_no_two: + + + + + + + + testb $1, %al + popl %edi + + movd %mm5, %eax + jnz .Lfinish_zero + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + movd 8(%ebx), %mm0 + psrlq %mm6, %mm2 + + movq %mm0, %mm1 + psllq %mm7, %mm0 + + movq %mm3, (%edx) + por %mm2, %mm0 + + psrlq %mm6, %mm1 + andl $32, %ecx + + popl %ebx + jz .Lfinish_one_unaligned + + + movd %mm1, 16(%edx) +.Lfinish_one_unaligned: + + movq %mm0, 8(%edx) + + emms + + ret + + + + +.Lfinish_zero: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + movq %mm3, 4(%edx) + psrlq %mm6, %mm2 + + movd %mm2, 12(%edx) + andl $32, %ecx + + popl %ebx + jz .Lfinish_zero_unaligned + + movq %mm2, 12(%edx) +.Lfinish_zero_unaligned: + + emms + + ret diff --git a/comm/third_party/libgcrypt/mpi/pentium4/sse2/distfiles b/comm/third_party/libgcrypt/mpi/pentium4/sse2/distfiles new file mode 100644 index 0000000000..7252cd7e3f --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/pentium4/sse2/distfiles @@ -0,0 +1,5 @@ +mpih-add1.S +mpih-mul1.S +mpih-mul2.S +mpih-mul3.S +mpih-sub1.S diff --git a/comm/third_party/libgcrypt/mpi/pentium4/sse2/mpih-add1.S b/comm/third_party/libgcrypt/mpi/pentium4/sse2/mpih-add1.S new file mode 100644 index 0000000000..55ed663032 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/pentium4/sse2/mpih-add1.S @@ -0,0 +1,91 @@ +/* Intel Pentium-4 mpn_add_n -- mpn addition. + * + * Copyright 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + + /******************* + * mpi_limb_t + * _gcry_mpih_add_n( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_ptr_t s2_ptr, (sp + 12) + * mpi_size_t size) (sp + 16) + * + * P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2 + * 6.0 cycles/limb if dst==src1 or dst==src2 + * P4 Prescott: >= 5 cycles/limb + * + * The 4 c/l achieved here isn't particularly good, but is better than 9 c/l + * for a basic adc loop. + */ + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(_gcry_mpih_add_n) +C_SYMBOL_NAME(_gcry_mpih_add_n:) + + pxor %mm0, %mm0 + + movl 8(%esp), %eax /* s1_ptr */ + movl %ebx, 8(%esp) /* re-use parameter space */ + movl 12(%esp), %ebx /* res_ptr */ + movl 4(%esp), %edx /* s2_ptr */ + movl 16(%esp), %ecx /* size */ + + leal (%eax,%ecx,4), %eax /* src1 end */ + leal (%ebx,%ecx,4), %ebx /* src2 end */ + leal (%edx,%ecx,4), %edx /* dst end */ + negl %ecx /* -size */ + +Ltop: +/* + C eax src1 end + C ebx src2 end + C ecx counter, limbs, negative + C edx dst end + C mm0 carry bit +*/ + + movd (%eax,%ecx,4), %mm1 + movd (%ebx,%ecx,4), %mm2 + paddq %mm2, %mm1 + + paddq %mm1, %mm0 + movd %mm0, (%edx,%ecx,4) + + psrlq $32, %mm0 + + addl $1, %ecx + jnz Ltop + + + movd %mm0, %eax + movl 8(%esp), %ebx /* restore saved EBX */ + emms + ret diff --git a/comm/third_party/libgcrypt/mpi/pentium4/sse2/mpih-mul1.S b/comm/third_party/libgcrypt/mpi/pentium4/sse2/mpih-mul1.S new file mode 100644 index 0000000000..a0c98fb4dd --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/pentium4/sse2/mpih-mul1.S @@ -0,0 +1,96 @@ +/* Intel Pentium-4 mpn_mul_1 -- Multiply a limb vector with a limb and store + * the result in a second limb vector. + * + * Copyright 2001, 2002, 2003, 2005 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + * + * src != dst src == dst + * P6 model 9 (Banias) ?.? + * P6 model 13 (Dothan) 4.75 4.75 + * P4 model 0 (Willamette) 4.0 6.0 + * P4 model 1 (?) 4.0 6.0 + * P4 model 2 (Northwood) 4.0 6.0 + * P4 model 3 (Prescott) ?.? ?.? + * P4 model 4 (Nocona) ?.? ?.? + * Unfortunately when src==dst the write-combining described in + * pentium4/README takes us up to 6 c/l. + * + */ + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(_gcry_mpih_mul_1) +C_SYMBOL_NAME(_gcry_mpih_mul_1:); + + pxor %mm0, %mm0 + +.Lstart_1c: + movl 8(%esp), %eax + movd 16(%esp), %mm7 + movl 4(%esp), %edx + movl 12(%esp), %ecx + +.Ltop: + +/* + C eax src, incrementing + C ebx + C ecx counter, size iterations + C edx dst, incrementing + C + C mm0 carry limb + C mm7 multiplier +*/ + + movd (%eax), %mm1 + addl $4, %eax + pmuludq %mm7, %mm1 + + paddq %mm1, %mm0 + movd %mm0, (%edx) + addl $4, %edx + + psrlq $32, %mm0 + + subl $1, %ecx + jnz .Ltop + + + movd %mm0, %eax + emms + ret + diff --git a/comm/third_party/libgcrypt/mpi/pentium4/sse2/mpih-mul2.S b/comm/third_party/libgcrypt/mpi/pentium4/sse2/mpih-mul2.S new file mode 100644 index 0000000000..f975adfca5 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/pentium4/sse2/mpih-mul2.S @@ -0,0 +1,136 @@ +/* Intel Pentium-4 mpn_addmul_1 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * + * Copyright 2001, 2002, 2004, 2005 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + * + * P3 model 9 (Banias) ?.? + * P3 model 13 (Dothan) 5.8 + * P4 model 0 (Willamette) 5.5 + * P4 model 1 (?) 5.5 + * P4 model 2 (Northwood) 5.5 + * P4 model 3 (Prescott) 6.0 + * P4 model 4 (Nocona) + * + * Only the carry limb propagation is on the dependent chain, but some other + * Pentium4 pipeline magic brings down performance to 6 cycles/l from the + * ideal 4 cycles/l. + */ + + + TEXT + ALIGN (4) + GLOBL C_SYMBOL_NAME(_gcry_mpih_addmul_1) +C_SYMBOL_NAME(_gcry_mpih_addmul_1:) + + pxor %mm4, %mm4 +.Lstart_1c: + movl 8(%esp), %eax + movl 12(%esp), %ecx + movl 4(%esp), %edx + movd 16(%esp), %mm7 + +/* + C eax src, incrementing ; 5B + C ecx loop counter, decrementing + C edx dst, incrementing + C + C mm4 carry, low 32-bits + C mm7 multiplier +*/ + + movd (%eax), %mm2 + pmuludq %mm7, %mm2 + + shrl $1, %ecx + jnc .Leven + + leal 4(%eax), %eax + movd (%edx), %mm1 + paddq %mm2, %mm1 + paddq %mm1, %mm4 + movd %mm4, (%edx) + psrlq $32, %mm4 + + testl %ecx, %ecx + jz .Lrtn + leal 4(%edx), %edx + + movd (%eax), %mm2 + pmuludq %mm7, %mm2 +.Leven: + movd 4(%eax), %mm0 + movd (%edx), %mm1 + pmuludq %mm7, %mm0 + + subl $1, %ecx + jz .Lend +.Lloop: + paddq %mm2, %mm1 + movd 8(%eax), %mm2 + paddq %mm1, %mm4 + movd 4(%edx), %mm3 + pmuludq %mm7, %mm2 + movd %mm4, (%edx) + psrlq $32, %mm4 + + paddq %mm0, %mm3 + movd 12(%eax), %mm0 + paddq %mm3, %mm4 + movd 8(%edx), %mm1 + pmuludq %mm7, %mm0 + movd %mm4, 4(%edx) + psrlq $32, %mm4 + + leal 8(%eax), %eax + leal 8(%edx), %edx + subl $1, %ecx + jnz .Lloop +.Lend: + paddq %mm2, %mm1 + paddq %mm1, %mm4 + movd 4(%edx), %mm3 + movd %mm4, (%edx) + psrlq $32, %mm4 + paddq %mm0, %mm3 + paddq %mm3, %mm4 + movd %mm4, 4(%edx) + psrlq $32, %mm4 +.Lrtn: + movd %mm4, %eax + emms + ret diff --git a/comm/third_party/libgcrypt/mpi/pentium4/sse2/mpih-mul3.S b/comm/third_party/libgcrypt/mpi/pentium4/sse2/mpih-mul3.S new file mode 100644 index 0000000000..ebcd2a68ea --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/pentium4/sse2/mpih-mul3.S @@ -0,0 +1,127 @@ +/* Intel Pentium-4 mpn_submul_1 -- Multiply a limb vector with a limb and + * subtract the result from a second limb vector. + * + * Copyright 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + * + * P4: 7 cycles/limb, unstable timing, at least on early Pentium4 silicon + * (stepping 10). + * + * This code is not particularly good at 7 c/l. The dependent chain is only + * 4 c/l and there's only 4 MMX unit instructions, so it's not clear why that + * speed isn't achieved. + * + * The arrangements made here to get a two instruction dependent chain are + * slightly subtle. In the loop the carry (or borrow rather) is a negative + * so that a paddq can be used to give a low limb ready to store, and a high + * limb ready to become the new carry after a psrlq. + * + * If the carry was a simple twos complement negative then the psrlq shift + * would need to bring in 0 bits or 1 bits according to whether the high was + * zero or non-zero, since a non-zero value would represent a negative + * needing sign extension. That wouldn't be particularly easy to arrange and + * certainly would add an instruction to the dependent chain, so instead an + * offset is applied so that the high limb will be 0xFFFFFFFF+c. With c in + * the range -0xFFFFFFFF to 0, the value 0xFFFFFFFF+c is in the range 0 to + * 0xFFFFFFFF and is therefore always positive and can always have 0 bits + * shifted in, which is what psrlq does. + * + * The extra 0xFFFFFFFF must be subtracted before c is used, but that can be + * done off the dependent chain. The total adjustment then is to add + * 0xFFFFFFFF00000000 to offset the new carry, and subtract + * 0x00000000FFFFFFFF to remove the offset from the current carry, for a net + * add of 0xFFFFFFFE00000001. In the code this is applied to the destination + * limb when fetched. + * + * It's also possible to view the 0xFFFFFFFF adjustment as a ones-complement + * negative, which is how it's undone for the return value, but that doesn't + * seem as clear. +*/ + + TEXT + ALIGN (4) + GLOBL C_SYMBOL_NAME(_gcry_mpih_submul_1) +C_SYMBOL_NAME(_gcry_mpih_submul_1:) + + pxor %mm1, %mm1 + +.Lstart_1c: + movl 8(%esp), %eax + pcmpeqd %mm0, %mm0 + + movd 16(%esp), %mm7 + pcmpeqd %mm6, %mm6 + + movl 4(%esp), %edx + psrlq $32, %mm0 + + movl 12(%esp), %ecx + psllq $32, %mm6 + + psubq %mm0, %mm6 + + psubq %mm1, %mm0 + +/* + C eax src, incrementing + C ebx + C ecx loop counter, decrementing + C edx dst, incrementing + C + C mm0 0xFFFFFFFF - borrow + C mm6 0xFFFFFFFE00000001 + C mm7 multiplier +*/ + +.Lloop: + movd (%eax), %mm1 + leal 4(%eax), %eax + movd (%edx), %mm2 + paddq %mm6, %mm2 + pmuludq %mm7, %mm1 + psubq %mm1, %mm2 + paddq %mm2, %mm0 + subl $1, %ecx + movd %mm0, (%edx) + psrlq $32, %mm0 + leal 4(%edx), %edx + jnz .Lloop + + movd %mm0, %eax + notl %eax + emms + ret diff --git a/comm/third_party/libgcrypt/mpi/pentium4/sse2/mpih-sub1.S b/comm/third_party/libgcrypt/mpi/pentium4/sse2/mpih-sub1.S new file mode 100644 index 0000000000..33900c742e --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/pentium4/sse2/mpih-sub1.S @@ -0,0 +1,112 @@ +/* Intel Pentium-4 mpn_sub_n -- mpn subtraction. + * + * Copyright 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_ptr_t s2_ptr, (sp + 12) + * mpi_size_t size) (sp + 16) + * + * P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2 + * 6.0 cycles/limb if dst==src1 or dst==src2 + * P4 Prescott: >= 5 cycles/limb + * + * The main loop code is 2x unrolled so that the carry bit can alternate + * between mm0 and mm1. + */ + + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(_gcry_mpih_sub_n) +C_SYMBOL_NAME(_gcry_mpih_sub_n:) + + pxor %mm0, %mm0 +.Lstart_nc: + movl 8(%esp), %eax + movl %ebx, 8(%esp) + movl 12(%esp), %ebx + movl 4(%esp), %edx + movl 16(%esp), %ecx + + leal (%eax,%ecx,4), %eax + leal (%ebx,%ecx,4), %ebx + leal (%edx,%ecx,4), %edx + negl %ecx + +.Ltop: +/* + C eax src1 end + C ebx src2 end + C ecx counter, limbs, negative + C edx dst end + C mm0 carry bit +*/ + + movd (%eax,%ecx,4), %mm1 + movd (%ebx,%ecx,4), %mm2 + psubq %mm2, %mm1 + + psubq %mm0, %mm1 + movd %mm1, (%edx,%ecx,4) + + psrlq $63, %mm1 + + addl $1, %ecx + jz .Ldone_mm1 + + movd (%eax,%ecx,4), %mm0 + movd (%ebx,%ecx,4), %mm2 + psubq %mm2, %mm0 + + psubq %mm1, %mm0 + movd %mm0, (%edx,%ecx,4) + + psrlq $63, %mm0 + + addl $1, %ecx + jnz .Ltop + + + movd %mm0, %eax + movl 8(%esp), %ebx + emms + ret + + + +.Ldone_mm1: + movd %mm1, %eax + movl 8(%esp), %ebx + emms + ret diff --git a/comm/third_party/libgcrypt/mpi/power/distfiles b/comm/third_party/libgcrypt/mpi/power/distfiles new file mode 100644 index 0000000000..e664c8db6a --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/power/distfiles @@ -0,0 +1,7 @@ +mpih-add1.S +mpih-lshift.S +mpih-mul1.S +mpih-mul2.S +mpih-mul3.S +mpih-rshift.S +mpih-sub1.S diff --git a/comm/third_party/libgcrypt/mpi/power/mpih-add1.S b/comm/third_party/libgcrypt/mpi/power/mpih-add1.S new file mode 100644 index 0000000000..876b56c664 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/power/mpih-add1.S @@ -0,0 +1,87 @@ +/* IBM POWER add_n -- Add two limb vectors of equal, non-zero length. + * + * Copyright (C) 1992, 1994, 1996, 1999, + * 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + +/* +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + */ + + .toc + .extern _gcry_mpih_add_n[DS] + .extern ._gcry_mpih_add_n +.csect [PR] + .align 2 + .globl _gcry_mpih_add_n + .globl ._gcry_mpih_add_n + .csect _gcry_mpih_add_n[DS] +_gcry_mpih_add_n: + .long ._gcry_mpih_add_n, TOC[tc0], 0 + .csect [PR] +._gcry_mpih_add_n: + andil. 10,6,1 # odd or even number of limbs? + l 8,0(4) # load least significant s1 limb + l 0,0(5) # load least significant s2 limb + cal 3,-4(3) # offset res_ptr, it's updated before it's used + sri 10,6,1 # count for unrolled loop + a 7,0,8 # add least significant limbs, set cy + mtctr 10 # copy count into CTR + beq 0,Leven # branch if even # of limbs (# of limbs >= 2) + +# We have an odd # of limbs. Add the first limbs separately. + cmpi 1,10,0 # is count for unrolled loop zero? + bne 1,L1 # branch if not + st 7,4(3) + aze 3,10 # use the fact that r10 is zero... + br # return + +# We added least significant limbs. Now reload the next limbs to enter loop. +L1: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + stu 7,4(3) + ae 7,0,8 # add limbs, set cy +Leven: lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + bdz Lend # If done, skip loop + +Loop: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + ae 11,9,10 # add previous limbs with cy, set cy + stu 7,4(3) # + lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + ae 7,0,8 # add previous limbs with cy, set cy + stu 11,4(3) # + bdn Loop # decrement CTR and loop back + +Lend: ae 11,9,10 # add limbs with cy, set cy + st 7,4(3) # + st 11,8(3) # + lil 3,0 # load cy into ... + aze 3,3 # ... return value register + br + diff --git a/comm/third_party/libgcrypt/mpi/power/mpih-lshift.S b/comm/third_party/libgcrypt/mpi/power/mpih-lshift.S new file mode 100644 index 0000000000..d9e42daf81 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/power/mpih-lshift.S @@ -0,0 +1,64 @@ +/* IBM POWER lshift + * + * Copyright (C) 1992, 1994, 1999, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + +/* +# INPUT PARAMETERS +# res_ptr r3 +# s_ptr r4 +# size r5 +# cnt r6 + */ + + .toc + .extern _gcry_mpih_lshift[DS] + .extern ._gcry_mpih_lshift +.csect [PR] + .align 2 + .globl _gcry_mpih_lshift + .globl ._gcry_mpih_lshift + .csect _gcry_mpih_lshift[DS] +_gcry_mpih_lshift: + .long ._gcry_mpih_lshift, TOC[tc0], 0 + .csect [PR] +._gcry_mpih_lshift: + sli 0,5,2 + cax 9,3,0 + cax 4,4,0 + sfi 8,6,32 + mtctr 5 # put limb count in CTR loop register + lu 0,-4(4) # read most significant limb + sre 3,0,8 # compute carry out limb, and init MQ register + bdz Lend2 # if just one limb, skip loop + lu 0,-4(4) # read 2:nd most significant limb + sreq 7,0,8 # compute most significant limb of result + bdz Lend # if just two limb, skip loop +Loop: lu 0,-4(4) # load next lower limb + stu 7,-4(9) # store previous result during read latency + sreq 7,0,8 # compute result limb + bdn Loop # loop back until CTR is zero +Lend: stu 7,-4(9) # store 2:nd least significant limb +Lend2: sle 7,0,6 # compute least significant limb + st 7,-4(9) # store it + br + diff --git a/comm/third_party/libgcrypt/mpi/power/mpih-mul1.S b/comm/third_party/libgcrypt/mpi/power/mpih-mul1.S new file mode 100644 index 0000000000..35034fa408 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/power/mpih-mul1.S @@ -0,0 +1,115 @@ +/* IBM POWER mul_1 -- Multiply a limb vector with a limb and store + * the result in a second limb vector. + * + * Copyright (C) 1992, 1994, 1999, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + +/* +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To +# obtain that operation, we have to use the 32x32->64 signed multiplication +# instruction, and add the appropriate compensation to the high limb of the +# result. We add the multiplicand if the multiplier has its most significant +# bit set, and we add the multiplier if the multiplicand has its most +# significant bit set. We need to preserve the carry flag between each +# iteration, so we have to compute the compensation carefully (the natural, +# srai+and doesn't work). Since the POWER architecture has a branch unit +# we can branch in zero cycles, so that's how we perform the additions. + */ + + .toc + .csect ._gcry_mpih_mul_1[PR] + .align 2 + .globl _gcry_mpih_mul_1 + .globl ._gcry_mpih_mul_1 + .csect _gcry_mpih_mul_1[DS] +_gcry_mpih_mul_1: + .long ._gcry_mpih_mul_1[PR], TOC[tc0], 0 + .csect ._gcry_mpih_mul_1[PR] +._gcry_mpih_mul_1: + + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 8 + ai 0,0,0 # reset carry + cax 9,9,7 + blt Lneg +Lpos: bdz Lend +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 8,0,9 + bge Lp0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 8,0,10 + bge Lp1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Lp1: bdn Lploop + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + cax 10,10,0 # adjust high limb for negative s2_limb + mfmq 0 + ae 8,0,9 + bge Ln0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + cax 9,9,0 # adjust high limb for negative s2_limb + mfmq 0 + ae 8,0,10 + bge Ln1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br + diff --git a/comm/third_party/libgcrypt/mpi/power/mpih-mul2.S b/comm/third_party/libgcrypt/mpi/power/mpih-mul2.S new file mode 100644 index 0000000000..d056e8f3c2 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/power/mpih-mul2.S @@ -0,0 +1,130 @@ +/* IBM POWER addmul_1 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * + * Copyright (C) 1992, 1994, 1999, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + + + +/* +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To +# obtain that operation, we have to use the 32x32->64 signed multiplication +# instruction, and add the appropriate compensation to the high limb of the +# result. We add the multiplicand if the multiplier has its most significant +# bit set, and we add the multiplier if the multiplicand has its most +# significant bit set. We need to preserve the carry flag between each +# iteration, so we have to compute the compensation carefully (the natural, +# srai+and doesn't work). Since the POWER architecture has a branch unit +# we can branch in zero cycles, so that's how we perform the additions. + */ + + .toc + .csect ._gcry_mpih_addmul_1[PR] + .align 2 + .globl _gcry_mpih_addmul_1 + .globl ._gcry_mpih_addmul_1 + .csect _gcry_mpih_addmul_1[DS] +_gcry_mpih_addmul_1: + .long ._gcry_mpih_addmul_1[PR], TOC[tc0], 0 + .csect ._gcry_mpih_addmul_1[PR] +._gcry_mpih_addmul_1: + + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 8 + cax 9,9,7 + l 7,4(3) + a 8,8,7 # add res_limb + blt Lneg +Lpos: bdz Lend + +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 8,0,9 # low limb + old_cy_limb + old cy + l 7,4(3) + aze 10,10 # propagate cy to new cy_limb + a 8,8,7 # add res_limb + bge Lp0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 8,0,10 + l 7,4(3) + aze 9,9 + a 8,8,7 + bge Lp1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Lp1: bdn Lploop + + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 7 + ae 8,7,9 + l 7,4(3) + ae 10,10,0 # propagate cy to new cy_limb + a 8,8,7 # add res_limb + bge Ln0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 7 + ae 8,7,10 + l 7,4(3) + ae 9,9,0 # propagate cy to new cy_limb + a 8,8,7 # add res_limb + bge Ln1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br + diff --git a/comm/third_party/libgcrypt/mpi/power/mpih-mul3.S b/comm/third_party/libgcrypt/mpi/power/mpih-mul3.S new file mode 100644 index 0000000000..8bc317b763 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/power/mpih-mul3.S @@ -0,0 +1,135 @@ +/* IBM POWER submul_1 -- Multiply a limb vector with a limb and subtract + * the result from a second limb vector. + * + * Copyright (C) 1992, 1994, 1999, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + + +/* + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To +# obtain that operation, we have to use the 32x32->64 signed multiplication +# instruction, and add the appropriate compensation to the high limb of the +# result. We add the multiplicand if the multiplier has its most significant +# bit set, and we add the multiplier if the multiplicand has its most +# significant bit set. We need to preserve the carry flag between each +# iteration, so we have to compute the compensation carefully (the natural, +# srai+and doesn't work). Since the POWER architecture has a branch unit +# we can branch in zero cycles, so that's how we perform the additions. + */ + + .toc + .csect ._gcry_mpih_submul_1[PR] + .align 2 + .globl _gcry_mpih_submul_1 + .globl ._gcry_mpih_submul_1 + .csect _gcry_mpih_submul_1[DS] +_gcry_mpih_submul_1: + .long ._gcry_mpih_submul_1[PR], TOC[tc0], 0 + .csect ._gcry_mpih_submul_1[PR] +._gcry_mpih_submul_1: + + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 11 + cax 9,9,7 + l 7,4(3) + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + blt Lneg +Lpos: bdz Lend + +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 11,0,9 # low limb + old_cy_limb + old cy + l 7,4(3) + aze 10,10 # propagate cy to new cy_limb + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + bge Lp0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 11,0,10 + l 7,4(3) + aze 9,9 + sf 8,11,7 + a 11,8,11 # invert cy (r11 is junk) + bge Lp1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Lp1: bdn Lploop + + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 7 + ae 11,7,9 + l 7,4(3) + ae 10,10,0 # propagate cy to new cy_limb + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + bge Ln0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 7 + ae 11,7,10 + l 7,4(3) + ae 9,9,0 # propagate cy to new cy_limb + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + bge Ln1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br + diff --git a/comm/third_party/libgcrypt/mpi/power/mpih-rshift.S b/comm/third_party/libgcrypt/mpi/power/mpih-rshift.S new file mode 100644 index 0000000000..f131a86d7b --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/power/mpih-rshift.S @@ -0,0 +1,64 @@ +/* IBM POWER rshift + * + * Copyright (C) 1992, 1994, 1999, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + + +/* +# INPUT PARAMETERS +# res_ptr r3 +# s_ptr r4 +# size r5 +# cnt r6 +*/ + + .toc + .extern _gcry_mpih_rshift[DS] + .extern ._gcry_mpih_rshift +.csect [PR] + .align 2 + .globl _gcry_mpih_rshift + .globl ._gcry_mpih_rshift + .csect _gcry_mpih_rshift[DS] +_gcry_mpih_rshift: + .long ._gcry_mpih_rshift, TOC[tc0], 0 + .csect [PR] +._gcry_mpih_rshift: + sfi 8,6,32 + mtctr 5 # put limb count in CTR loop register + l 0,0(4) # read least significant limb + ai 9,3,-4 # adjust res_ptr since it's offset in the stu:s + sle 3,0,8 # compute carry limb, and init MQ register + bdz Lend2 # if just one limb, skip loop + lu 0,4(4) # read 2:nd least significant limb + sleq 7,0,8 # compute least significant limb of result + bdz Lend # if just two limb, skip loop +Loop: lu 0,4(4) # load next higher limb + stu 7,4(9) # store previous result during read latency + sleq 7,0,8 # compute result limb + bdn Loop # loop back until CTR is zero +Lend: stu 7,4(9) # store 2:nd most significant limb +Lend2: sre 7,0,6 # compute most significant limb + st 7,4(9) # store it + br + + diff --git a/comm/third_party/libgcrypt/mpi/power/mpih-sub1.S b/comm/third_party/libgcrypt/mpi/power/mpih-sub1.S new file mode 100644 index 0000000000..02748fc556 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/power/mpih-sub1.S @@ -0,0 +1,88 @@ +/* IBM POWER sub_n -- Subtract two limb vectors of equal, non-zero length. + * + * Copyright (C) 1992, 1994, 1995, 1996, 1999, + * 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + +/* +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + */ + + .toc + .extern _gcry_mpih_sub_n[DS] + .extern ._gcry_mpih_sub_n +.csect [PR] + .align 2 + .globl _gcry_mpih_sub_n + .globl ._gcry_mpih_sub_n + .csect _gcry_mpih_sub_n[DS] +_gcry_mpih_sub_n: + .long ._gcry_mpih_sub_n, TOC[tc0], 0 + .csect [PR] +._gcry_mpih_sub_n: + andil. 10,6,1 # odd or even number of limbs? + l 8,0(4) # load least significant s1 limb + l 0,0(5) # load least significant s2 limb + cal 3,-4(3) # offset res_ptr, it's updated before it's used + sri 10,6,1 # count for unrolled loop + sf 7,0,8 # subtract least significant limbs, set cy + mtctr 10 # copy count into CTR + beq 0,Leven # branch if even # of limbs (# of limbs >= 2) + +# We have an odd # of limbs. Add the first limbs separately. + cmpi 1,10,0 # is count for unrolled loop zero? + bne 1,L1 # branch if not + st 7,4(3) + sfe 3,0,0 # load !cy into ... + sfi 3,3,0 # ... return value register + br # return + +# We added least significant limbs. Now reload the next limbs to enter loop. +L1: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + stu 7,4(3) + sfe 7,0,8 # subtract limbs, set cy +Leven: lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + bdz Lend # If done, skip loop + +Loop: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + sfe 11,10,9 # subtract previous limbs with cy, set cy + stu 7,4(3) # + lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + sfe 7,0,8 # subtract previous limbs with cy, set cy + stu 11,4(3) # + bdn Loop # decrement CTR and loop back + +Lend: sfe 11,10,9 # subtract limbs with cy, set cy + st 7,4(3) # + st 11,8(3) # + sfe 3,0,0 # load !cy into ... + sfi 3,3,0 # ... return value register + br + diff --git a/comm/third_party/libgcrypt/mpi/powerpc32/distfiles b/comm/third_party/libgcrypt/mpi/powerpc32/distfiles new file mode 100644 index 0000000000..af10d795b0 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/powerpc32/distfiles @@ -0,0 +1,9 @@ +mpih-add1.S +mpih-sub1.S +mpih-mul1.S +mpih-mul2.S +mpih-mul3.S +mpih-lshift.S +mpih-rshift.S +syntax.h + diff --git a/comm/third_party/libgcrypt/mpi/powerpc32/mpih-add1.S b/comm/third_party/libgcrypt/mpi/powerpc32/mpih-add1.S new file mode 100644 index 0000000000..1661f5e679 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/powerpc32/mpih-add1.S @@ -0,0 +1,136 @@ +/* PowerPC-32 add_n -- Add two limb vectors of equal, non-zero length. + * + * Copyright (C) 1992, 1994, 1995, 1998, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + + +#ifndef USE_PPC_PATCHES + +/******************* + * mpi_limb_t + * _gcry_mpih_add_n( mpi_ptr_t res_ptr, (r3) + * mpi_ptr_t s1_ptr, (r4) + * mpi_ptr_t s2_ptr, (r5) + * mpi_size_t size) (r6) + */ + + .toc + .extern _gcry_mpih_add_n[DS] + .extern ._gcry_mpih_add_n +.csect [PR] + .align 2 + .globl _gcry_mpih_add_n + .globl ._gcry_mpih_add_n + .csect _gcry_mpih_add_n[DS] +_gcry_mpih_add_n: + .long ._gcry_mpih_add_n, TOC[tc0], 0 + .csect [PR] +._gcry_mpih_add_n: + mtctr 6 # copy size into CTR + lwz 8,0(4) # load least significant s1 limb + lwz 0,0(5) # load least significant s2 limb + addi 3,3,-4 # offset res_ptr, it is updated before used + addc 7,0,8 # add least significant limbs, set cy + bdz Lend # If done, skip loop +Loop: lwzu 8,4(4) # load s1 limb and update s1_ptr + lwzu 0,4(5) # load s2 limb and update s2_ptr + stwu 7,4(3) # store previous limb in load latency slot + adde 7,0,8 # add new limbs with cy, set cy + bdnz Loop # decrement CTR and loop back +Lend: stw 7,4(3) # store ultimate result limb + li 3,0 # load cy into ... + addze 3,3 # ... return value register + blr + +#else +/* Add two limb vectors of equal, non-zero length for PowerPC. + Copyright (C) 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include "sysdep.h" +#include "asm-syntax.h" + + +/* mp_limb_t mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, + mp_size_t size) + Calculate s1+s2 and put result in res_ptr; return carry, 0 or 1. */ + +/* Note on optimisation: This code is optimal for the 601. Almost every other + possible 2-unrolled inner loop will not be. Also, watch out for the + alignment... */ + +EALIGN(_gcry_mpih_add_n,3,0) +/* Set up for loop below. */ + mtcrf 0x01,%r6 + srwi. %r7,%r6,1 + li %r10,0 + mtctr %r7 + bt 31,2f + +/* Clear the carry. */ + addic %r0,%r0,0 +/* Adjust pointers for loop. */ + addi %r3,%r3,-4 + addi %r4,%r4,-4 + addi %r5,%r5,-4 + b 0f + +2: lwz %r7,0(%r5) + lwz %r6,0(%r4) + addc %r6,%r6,%r7 + stw %r6,0(%r3) + beq 1f + +/* The loop. */ + +/* Align start of loop to an odd word boundary to guarantee that the + last two words can be fetched in one access (for 601). */ +0: lwz %r9,4(%r4) + lwz %r8,4(%r5) + lwzu %r6,8(%r4) + lwzu %r7,8(%r5) + adde %r8,%r9,%r8 + stw %r8,4(%r3) + adde %r6,%r6,%r7 + stwu %r6,8(%r3) + bdnz 0b +/* Return the carry. */ +1: addze %r3,%r10 + blr +END(_gcry_mpih_add_n) +#endif + diff --git a/comm/third_party/libgcrypt/mpi/powerpc32/mpih-lshift.S b/comm/third_party/libgcrypt/mpi/powerpc32/mpih-lshift.S new file mode 100644 index 0000000000..6231095dc2 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/powerpc32/mpih-lshift.S @@ -0,0 +1,198 @@ +/* PowerPC-32 lshift + * + * Copyright (C) 1995, 1998, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + + +#ifndef USE_PPC_PATCHES + +/******************* + * mpi_limb_t + * _gcry_mpih_lshift( mpi_ptr_t wp, (r3) + * mpi_ptr_t up, (r4) + * mpi_size_t usize, (r5) + * unsigned cnt) (r6) + */ + + .toc +.csect .text[PR] + .align 2 + .globl _gcry_mpih_lshift + .globl ._gcry_mpih_lshift + .csect _gcry_mpih_lshift[DS] +_gcry_mpih_lshift: + .long ._gcry_mpih_lshift, TOC[tc0], 0 + .csect .text[PR] +._gcry_mpih_lshift: + mtctr 5 # copy size into CTR + slwi 0,5,2 + add 7,3,0 # make r7 point at end of res + add 4,4,0 # make r4 point at end of s1 + subfic 8,6,32 + lwzu 11,-4(4) # load first s1 limb + srw 3,11,8 # compute function return value + bdz Lend1 + +Loop: lwzu 10,-4(4) + slw 9,11,6 + srw 12,10,8 + or 9,9,12 + stwu 9,-4(7) + bdz Lend2 + lwzu 11,-4(4) + slw 9,10,6 + srw 12,11,8 + or 9,9,12 + stwu 9,-4(7) + bdnz Loop + +Lend1: slw 0,11,6 + stw 0,-4(7) + blr + +Lend2: slw 0,10,6 + stw 0,-4(7) + blr + +#else +/* Shift a limb left, low level routine. + Copyright (C) 1996, 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* mp_limb_t mpn_lshift (mp_ptr wp, mp_srcptr up, mp_size_t usize, + unsigned int cnt) */ + +EALIGN(_gcry_mpih_lshift,3,0) + mtctr %r5 # copy size into CTR + cmplwi %cr0,%r5,16 # is size < 16 + slwi %r0,%r5,2 + add %r7,%r3,%r0 # make r7 point at end of res + add %r4,%r4,%r0 # make r4 point at end of s1 + lwzu %r11,-4(%r4) # load first s1 limb + subfic %r8,%r6,32 + srw %r3,%r11,%r8 # compute function return value + bge %cr0,L(big) # branch if size >= 16 + + bdz L(end1) + +0: lwzu %r10,-4(%r4) + slw %r9,%r11,%r6 + srw %r12,%r10,%r8 + or %r9,%r9,%r12 + stwu %r9,-4(%r7) + bdz L(end2) + lwzu %r11,-4(%r4) + slw %r9,%r10,%r6 + srw %r12,%r11,%r8 + or %r9,%r9,%r12 + stwu %r9,-4(%r7) + bdnz 0b + +L(end1):slw %r0,%r11,%r6 + stw %r0,-4(%r7) + blr + + +/* Guaranteed not to succeed. */ +L(boom): tweq %r0,%r0 + +/* We imitate a case statement, by using (yuk!) fixed-length code chunks, + of size 4*12 bytes. We have to do this (or something) to make this PIC. */ +L(big): mflr %r9 + bltl- %cr0,L(boom) # Never taken, only used to set LR. + slwi %r10,%r6,4 + mflr %r12 + add %r10,%r12,%r10 + slwi %r8,%r6,5 + add %r10,%r8,%r10 + mtctr %r10 + addi %r5,%r5,-1 + mtlr %r9 + bctr + +L(end2):slw %r0,%r10,%r6 + stw %r0,-4(%r7) + blr + +#define DO_LSHIFT(n) \ + mtctr %r5; \ +0: lwzu %r10,-4(%r4); \ + slwi %r9,%r11,n; \ + inslwi %r9,%r10,n,32-n; \ + stwu %r9,-4(%r7); \ + bdz- L(end2); \ + lwzu %r11,-4(%r4); \ + slwi %r9,%r10,n; \ + inslwi %r9,%r11,n,32-n; \ + stwu %r9,-4(%r7); \ + bdnz 0b; \ + b L(end1) + + DO_LSHIFT(1) + DO_LSHIFT(2) + DO_LSHIFT(3) + DO_LSHIFT(4) + DO_LSHIFT(5) + DO_LSHIFT(6) + DO_LSHIFT(7) + DO_LSHIFT(8) + DO_LSHIFT(9) + DO_LSHIFT(10) + DO_LSHIFT(11) + DO_LSHIFT(12) + DO_LSHIFT(13) + DO_LSHIFT(14) + DO_LSHIFT(15) + DO_LSHIFT(16) + DO_LSHIFT(17) + DO_LSHIFT(18) + DO_LSHIFT(19) + DO_LSHIFT(20) + DO_LSHIFT(21) + DO_LSHIFT(22) + DO_LSHIFT(23) + DO_LSHIFT(24) + DO_LSHIFT(25) + DO_LSHIFT(26) + DO_LSHIFT(27) + DO_LSHIFT(28) + DO_LSHIFT(29) + DO_LSHIFT(30) + DO_LSHIFT(31) + +END(_gcry_mpih_lshift) +#endif diff --git a/comm/third_party/libgcrypt/mpi/powerpc32/mpih-mul1.S b/comm/third_party/libgcrypt/mpi/powerpc32/mpih-mul1.S new file mode 100644 index 0000000000..bd418f7e3a --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/powerpc32/mpih-mul1.S @@ -0,0 +1,120 @@ +/* PowerPC-32 mul_1 -- Multiply a limb vector with a limb and store + * the result in a second limb vector. + * + * Copyright (C) 1992, 1993, 1994, 1995, + * 1998, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + + +#ifndef USE_PPC_PATCHES + +/******************* + * mpi_limb_t + * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, (r3) + * mpi_ptr_t s1_ptr, (r4) + * mpi_size_t s1_size, (r5) + * mpi_limb_t s2_limb) (r6) + * + * This is a fairly straightforward implementation. The timing of the PC601 + * is hard to understand, so I will wait to optimize this until I have some + * hardware to play with. + * + * The code trivially generalizes to 64 bit limbs for the PC620. + */ + + .toc + .csect ._gcry_mpih_mul_1[PR] + .align 2 + .globl _gcry_mpih_mul_1 + .globl ._gcry_mpih_mul_1 + .csect _gcry_mpih_mul_1[DS] +_gcry_mpih_mul_1: + .long ._gcry_mpih_mul_1[PR], TOC[tc0], 0 + .csect ._gcry_mpih_mul_1[PR] +._gcry_mpih_mul_1: + mtctr 5 + + lwz 0,0(4) + mullw 7,0,6 + mulhwu 10,0,6 + addi 3,3,-4 # adjust res_ptr + addic 5,5,0 # clear cy with dummy insn + bdz Lend + +Loop: lwzu 0,4(4) + stwu 7,4(3) + mullw 8,0,6 + adde 7,8,10 + mulhwu 10,0,6 + bdnz Loop + +Lend: stw 7,4(3) + addze 3,10 + blr + +#else +/* Multiply a limb vector by a limb, for PowerPC. + Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + + +/* mp_limb_t mpn_mul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, + mp_size_t s1_size, mp_limb_t s2_limb) + Calculate s1*s2 and put result in res_ptr; return carry. */ + +ENTRY(_gcry_mpih_mul_1) + mtctr %r5 + + lwz %r0,0(%r4) + mullw %r7,%r0,%r6 + mulhwu %r10,%r0,%r6 + addi %r3,%r3,-4 # adjust res_ptr + addic %r5,%r5,0 # clear cy with dummy insn + bdz 1f + +0: lwzu %r0,4(%r4) + stwu %r7,4(%r3) + mullw %r8,%r0,%r6 + adde %r7,%r8,%r10 + mulhwu %r10,%r0,%r6 + bdnz 0b + +1: stw %r7,4(%r3) + addze %r3,%r10 + blr +END(_gcry_mpih_mul_1) +#endif diff --git a/comm/third_party/libgcrypt/mpi/powerpc32/mpih-mul2.S b/comm/third_party/libgcrypt/mpi/powerpc32/mpih-mul2.S new file mode 100644 index 0000000000..1d97b81a4d --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/powerpc32/mpih-mul2.S @@ -0,0 +1,127 @@ +/* PowerPC-32 addmul_1 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * + * Copyright (C) 1995, 1998, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + + +#ifndef USE_PPC_PATCHES + +/******************* + * mpi_limb_t + * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, (r3) + * mpi_ptr_t s1_ptr, (r4) + * mpi_size_t s1_size, (r5) + * mpi_limb_t s2_limb) (r6) + * + * This is a fairly straightforward implementation. The timing of the PC601 + * is hard to understand, so I will wait to optimize this until I have some + * hardware to play with. + * + * The code trivially generalizes to 64 bit limbs for the PC620. + */ + + + .toc + .csect ._gcry_mpih_addmul_1[PR] + .align 2 + .globl _gcry_mpih_addmul_1 + .globl ._gcry_mpih_addmul_1 + .csect _gcry_mpih_addmul_1[DS] +_gcry_mpih_addmul_1: + .long ._gcry_mpih_addmul_1[PR], TOC[tc0], 0 + .csect ._gcry_mpih_addmul_1[PR] +._gcry_mpih_addmul_1: + mtctr 5 + + lwz 0,0(4) + mullw 7,0,6 + mulhwu 10,0,6 + lwz 9,0(3) + addc 8,7,9 + addi 3,3,-4 + bdz Lend + +Loop: lwzu 0,4(4) + stwu 8,4(3) + mullw 8,0,6 + adde 7,8,10 + mulhwu 10,0,6 + lwz 9,4(3) + addze 10,10 + addc 8,7,9 + bdnz Loop + +Lend: stw 8,4(3) + addze 3,10 + blr + +#else +/* Multiply a limb vector by a single limb, for PowerPC. + Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + + +/* mp_limb_t mpn_addmul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, + mp_size_t s1_size, mp_limb_t s2_limb) + Calculate res+s1*s2 and put result back in res; return carry. */ +ENTRY(_gcry_mpih_addmul_1) + mtctr %r5 + + lwz %r0,0(%r4) + mullw %r7,%r0,%r6 + mulhwu %r10,%r0,%r6 + lwz %r9,0(%r3) + addc %r8,%r7,%r9 + addi %r3,%r3,-4 /* adjust res_ptr */ + bdz 1f + +0: lwzu %r0,4(%r4) + stwu %r8,4(%r3) + mullw %r8,%r0,%r6 + adde %r7,%r8,%r10 + mulhwu %r10,%r0,%r6 + lwz %r9,4(%r3) + addze %r10,%r10 + addc %r8,%r7,%r9 + bdnz 0b + +1: stw %r8,4(%r3) + addze %r3,%r10 + blr +END(_gcry_mpih_addmul_1) +#endif diff --git a/comm/third_party/libgcrypt/mpi/powerpc32/mpih-mul3.S b/comm/third_party/libgcrypt/mpi/powerpc32/mpih-mul3.S new file mode 100644 index 0000000000..c410dbb02e --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/powerpc32/mpih-mul3.S @@ -0,0 +1,130 @@ +/* PowerPC-32 submul_1 -- Multiply a limb vector with a limb and subtract + * the result from a second limb vector. + * + * Copyright (C) 1995, 1998, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + + +#ifndef USE_PPC_PATCHES + +/******************* + * mpi_limb_t + * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, (r3) + * mpi_ptr_t s1_ptr, (r4) + * mpi_size_t s1_size, (r5) + * mpi_limb_t s2_limb) (r6) + * + * This is a fairly straightforward implementation. The timing of the PC601 + * is hard to understand, so I will wait to optimize this until I have some + * hardware to play with. + * + * The code trivially generalizes to 64 bit limbs for the PC620. + */ + + .toc + .csect ._gcry_mpih_submul_1[PR] + .align 2 + .globl _gcry_mpih_submul_1 + .globl ._gcry_mpih_submul_1 + .csect _gcry_mpih_submul_1[DS] +_gcry_mpih_submul_1: + .long ._gcry_mpih_submul_1[PR], TOC[tc0], 0 + .csect ._gcry_mpih_submul_1[PR] +._gcry_mpih_submul_1: + mtctr 5 + + lwz 0,0(4) + mullw 7,0,6 + mulhwu 10,0,6 + lwz 9,0(3) + subfc 8,7,9 + addc 7,7,8 # invert cy (r7 is junk) + addi 3,3,-4 + bdz Lend + +Loop: lwzu 0,4(4) + stwu 8,4(3) + mullw 8,0,6 + adde 7,8,10 + mulhwu 10,0,6 + lwz 9,4(3) + addze 10,10 + subfc 8,7,9 + addc 7,7,8 # invert cy (r7 is junk) + bdnz Loop + +Lend: stw 8,4(3) + addze 3,10 + blr + +#else +/* Multiply a limb vector by a single limb, for PowerPC. + Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* mp_limb_t mpn_submul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, + mp_size_t s1_size, mp_limb_t s2_limb) + Calculate res-s1*s2 and put result back in res; return carry. */ + +ENTRY(_gcry_mpih_submul_1) + mtctr %r5 + + lwz %r0,0(%r4) + mullw %r7,%r0,%r6 + mulhwu %r10,%r0,%r6 + lwz %r9,0(%r3) + subf %r8,%r7,%r9 + addc %r7,%r7,%r8 # invert cy (r7 is junk) + addi %r3,%r3,-4 # adjust res_ptr + bdz 1f + +0: lwzu %r0,4(%r4) + stwu %r8,4(%r3) + mullw %r8,%r0,%r6 + adde %r7,%r8,%r10 + mulhwu %r10,%r0,%r6 + lwz %r9,4(%r3) + addze %r10,%r10 + subf %r8,%r7,%r9 + addc %r7,%r7,%r8 # invert cy (r7 is junk) + bdnz 0b + +1: stw %r8,4(%r3) + addze %r3,%r10 + blr +END(_gcry_mpih_submul_1) +#endif diff --git a/comm/third_party/libgcrypt/mpi/powerpc32/mpih-rshift.S b/comm/third_party/libgcrypt/mpi/powerpc32/mpih-rshift.S new file mode 100644 index 0000000000..98349edb5b --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/powerpc32/mpih-rshift.S @@ -0,0 +1,131 @@ +/* PowerPC-32 rshift + * + * Copyright (C) 1995, 1998, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +#ifndef USE_PPC_PATCHES + +/******************* + * mpi_limb_t + * _gcry_mpih_rshift( mpi_ptr_t wp, (r3) + * mpi_ptr_t up, (r4) + * mpi_size_t usize, (r5) + * unsigned cnt) (r6) + */ + + .toc +.csect .text[PR] + .align 2 + .globl _gcry_mpih_rshift + .globl ._gcry_mpih_rshift + .csect _gcry_mpih_rshift[DS] +_gcry_mpih_rshift: + .long ._gcry_mpih_rshift, TOC[tc0], 0 + .csect .text[PR] +._gcry_mpih_rshift: + mtctr 5 # copy size into CTR + addi 7,3,-4 # move adjusted res_ptr to free return reg + subfic 8,6,32 + lwz 11,0(4) # load first s1 limb + slw 3,11,8 # compute function return value + bdz Lend1 + +Loop: lwzu 10,4(4) + srw 9,11,6 + slw 12,10,8 + or 9,9,12 + stwu 9,4(7) + bdz Lend2 + lwzu 11,4(4) + srw 9,10,6 + slw 12,11,8 + or 9,9,12 + stwu 9,4(7) + bdnz Loop + +Lend1: srw 0,11,6 + stw 0,4(7) + blr + +Lend2: srw 0,10,6 + stw 0,4(7) + blr + +#else +/* Shift a limb right, low level routine. + Copyright (C) 1995, 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + + +/* INPUT PARAMETERS + res_ptr r3 + s1_ptr r4 + size r5 + cnt r6 */ + +ENTRY(_gcry_mpih_rshift) + mtctr 5 # copy size into CTR + addi 7,3,-4 # move adjusted res_ptr to free return reg + subfic 8,6,32 + lwz 11,0(4) # load first s1 limb + slw 3,11,8 # compute function return value + bdz 1f + +0: lwzu 10,4(4) + srw 9,11,6 + slw 12,10,8 + or 9,9,12 + stwu 9,4(7) + bdz 2f + lwzu 11,4(4) + srw 9,10,6 + slw 12,11,8 + or 9,9,12 + stwu 9,4(7) + bdnz 0b + +1: srw 0,11,6 + stw 0,4(7) + blr + +2: srw 0,10,6 + stw 0,4(7) + blr +END(_gcry_mpih_rshift) +#endif diff --git a/comm/third_party/libgcrypt/mpi/powerpc32/mpih-sub1.S b/comm/third_party/libgcrypt/mpi/powerpc32/mpih-sub1.S new file mode 100644 index 0000000000..d612ea890a --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/powerpc32/mpih-sub1.S @@ -0,0 +1,133 @@ +/* PowerPC-32 sub_n -- Subtract two limb vectors of the same length > 0 + * and store difference in a third limb vector. + * + * Copyright (C) 1992, 1994, 1995, 1998, + * 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + + +#ifndef USE_PPC_PATCHES + +/******************* + * mpi_limb_t + * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, (r3) + * mpi_ptr_t s1_ptr, (r4) + * mpi_ptr_t s2_ptr, (r5) + * mpi_size_t size) (r6) + */ + + .toc + .extern _gcry_mpih_sub_n[DS] + .extern ._gcry_mpih_sub_n +.csect [PR] + .align 2 + .globl _gcry_mpih_sub_n + .globl ._gcry_mpih_sub_n + .csect _gcry_mpih_sub_n[DS] +_gcry_mpih_sub_n: + .long ._gcry_mpih_sub_n, TOC[tc0], 0 + .csect [PR] +._gcry_mpih_sub_n: + mtctr 6 # copy size into CTR + lwz 8,0(4) # load least significant s1 limb + lwz 0,0(5) # load least significant s2 limb + addi 3,3,-4 # offset res_ptr, it is updated before used + subfc 7,0,8 # add least significant limbs, set cy + bdz Lend # If done, skip loop +Loop: lwzu 8,4(4) # load s1 limb and update s1_ptr + lwzu 0,4(5) # load s2 limb and update s2_ptr + stwu 7,4(3) # store previous limb in load latency slot + subfe 7,0,8 # add new limbs with cy, set cy + bdnz Loop # decrement CTR and loop back +Lend: stw 7,4(3) # store ultimate result limb + subfe 3,0,0 # load !cy into ... + subfic 3,3,0 # ... return value register + blr + +#else +/* Subtract two limb vectors of equal, non-zero length for PowerPC. + Copyright (C) 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* mp_limb_t mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, + mp_size_t size) + Calculate s1-s2 and put result in res_ptr; return borrow, 0 or 1. */ + +/* Note on optimisation: This code is optimal for the 601. Almost every other + possible 2-unrolled inner loop will not be. Also, watch out for the + alignment... */ + +EALIGN(_gcry_mpih_sub_n,3,1) +/* Set up for loop below. */ + mtcrf 0x01,%r6 + srwi. %r7,%r6,1 + mtctr %r7 + bt 31,2f + +/* Set the carry (clear the borrow). */ + subfc %r0,%r0,%r0 +/* Adjust pointers for loop. */ + addi %r3,%r3,-4 + addi %r4,%r4,-4 + addi %r5,%r5,-4 + b 0f + +2: lwz %r7,0(%r5) + lwz %r6,0(%r4) + subfc %r6,%r7,%r6 + stw %r6,0(%r3) + beq 1f + +/* Align start of loop to an odd word boundary to guarantee that the + last two words can be fetched in one access (for 601). This turns + out to be important. */ +0: + lwz %r9,4(%r4) + lwz %r8,4(%r5) + lwzu %r6,8(%r4) + lwzu %r7,8(%r5) + subfe %r8,%r8,%r9 + stw %r8,4(%r3) + subfe %r6,%r7,%r6 + stwu %r6,8(%r3) + bdnz 0b +/* Return the borrow. */ +1: subfe %r3,%r3,%r3 + neg %r3,%r3 + blr +END(_gcry_mpih_sub_n) +#endif diff --git a/comm/third_party/libgcrypt/mpi/powerpc32/syntax.h b/comm/third_party/libgcrypt/mpi/powerpc32/syntax.h new file mode 100644 index 0000000000..5d4af9f0ae --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/powerpc32/syntax.h @@ -0,0 +1,75 @@ +/* gmp2-2.0.2-ppc/mpn/powerpc-linux/syntax.h Tue Oct 6 19:27:01 1998 */ +/* From glibc's sysdeps/unix/sysv/linux/powerpc/sysdep.h */ + +/* Copyright (C) 1992, 1997, 1998 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + + +#define USE_PPC_PATCHES 1 + +/* This seems to always be the case on PPC. */ +#define ALIGNARG(log2) log2 +/* For ELF we need the `.type' directive to make shared libs work right. */ +#define ASM_TYPE_DIRECTIVE(name,typearg) .type name,typearg; +#define ASM_SIZE_DIRECTIVE(name) .size name,.-name +#define ASM_GLOBAL_DIRECTIVE .globl + +#ifdef __STDC__ +#define C_LABEL(name) C_SYMBOL_NAME(name)##: +#else +#define C_LABEL(name) C_SYMBOL_NAME(name)/**/: +#endif + +#ifdef __STDC__ +#define L(body) .L##body +#else +#define L(body) .L/**/body +#endif + +/* No profiling of gmp's assembly for now... */ +#define CALL_MCOUNT /* no profiling */ + +#define ENTRY(name) \ + ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name); \ + ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function) \ + .align ALIGNARG(2); \ + C_LABEL(name) \ + CALL_MCOUNT + +#define EALIGN_W_0 /* No words to insert. */ +#define EALIGN_W_1 nop +#define EALIGN_W_2 nop;nop +#define EALIGN_W_3 nop;nop;nop +#define EALIGN_W_4 EALIGN_W_3;nop +#define EALIGN_W_5 EALIGN_W_4;nop +#define EALIGN_W_6 EALIGN_W_5;nop +#define EALIGN_W_7 EALIGN_W_6;nop + +/* EALIGN is like ENTRY, but does alignment to 'words'*4 bytes + past a 2^align boundary. */ +#define EALIGN(name, alignt, words) \ + ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name); \ + ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(name) + +#undef END +#define END(name) \ + ASM_SIZE_DIRECTIVE(name) + diff --git a/comm/third_party/libgcrypt/mpi/powerpc64/distfiles b/comm/third_party/libgcrypt/mpi/powerpc64/distfiles new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/powerpc64/distfiles diff --git a/comm/third_party/libgcrypt/mpi/sparc32/distfiles b/comm/third_party/libgcrypt/mpi/sparc32/distfiles new file mode 100644 index 0000000000..51329dbdbe --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/sparc32/distfiles @@ -0,0 +1,5 @@ +mpih-lshift.S +mpih-rshift.S +mpih-add1.S +udiv.S + diff --git a/comm/third_party/libgcrypt/mpi/sparc32/mpih-add1.S b/comm/third_party/libgcrypt/mpi/sparc32/mpih-add1.S new file mode 100644 index 0000000000..61a80ca320 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/sparc32/mpih-add1.S @@ -0,0 +1,239 @@ +/* SPARC _add_n -- Add two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 1995, 1996, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + + +/******************* + * mpi_limb_t + * _gcry_mpih_add_n( mpi_ptr_t res_ptr, + * mpi_ptr_t s1_ptr, + * mpi_ptr_t s2_ptr, + * mpi_size_t size) + */ + +! INPUT PARAMETERS +#define res_ptr %o0 +#define s1_ptr %o1 +#define s2_ptr %o2 +#define size %o3 + +#include "sysdep.h" + + .text + .align 4 + .global C_SYMBOL_NAME(_gcry_mpih_add_n) +C_SYMBOL_NAME(_gcry_mpih_add_n): + xor s2_ptr,res_ptr,%g1 + andcc %g1,4,%g0 + bne L1 ! branch if alignment differs + nop +! ** V1a ** +L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0 + be L_v1 ! if no, branch + nop +/* Add least significant limb separately to align res_ptr and s2_ptr */ + ld [s1_ptr],%g4 + add s1_ptr,4,s1_ptr + ld [s2_ptr],%g2 + add s2_ptr,4,s2_ptr + add size,-1,size + addcc %g4,%g2,%o4 + st %o4,[res_ptr] + add res_ptr,4,res_ptr +L_v1: addx %g0,%g0,%o4 ! save cy in register + cmp size,2 ! if size < 2 ... + bl Lend2 ! ... branch to tail code + subcc %g0,%o4,%g0 ! restore cy + + ld [s1_ptr+0],%g4 + addcc size,-10,size + ld [s1_ptr+4],%g1 + ldd [s2_ptr+0],%g2 + blt Lfin1 + subcc %g0,%o4,%g0 ! restore cy +/* Add blocks of 8 limbs until less than 8 limbs remain */ +Loop1: addxcc %g4,%g2,%o4 + ld [s1_ptr+8],%g4 + addxcc %g1,%g3,%o5 + ld [s1_ptr+12],%g1 + ldd [s2_ptr+8],%g2 + std %o4,[res_ptr+0] + addxcc %g4,%g2,%o4 + ld [s1_ptr+16],%g4 + addxcc %g1,%g3,%o5 + ld [s1_ptr+20],%g1 + ldd [s2_ptr+16],%g2 + std %o4,[res_ptr+8] + addxcc %g4,%g2,%o4 + ld [s1_ptr+24],%g4 + addxcc %g1,%g3,%o5 + ld [s1_ptr+28],%g1 + ldd [s2_ptr+24],%g2 + std %o4,[res_ptr+16] + addxcc %g4,%g2,%o4 + ld [s1_ptr+32],%g4 + addxcc %g1,%g3,%o5 + ld [s1_ptr+36],%g1 + ldd [s2_ptr+32],%g2 + std %o4,[res_ptr+24] + addx %g0,%g0,%o4 ! save cy in register + addcc size,-8,size + add s1_ptr,32,s1_ptr + add s2_ptr,32,s2_ptr + add res_ptr,32,res_ptr + bge Loop1 + subcc %g0,%o4,%g0 ! restore cy + +Lfin1: addcc size,8-2,size + blt Lend1 + subcc %g0,%o4,%g0 ! restore cy +/* Add blocks of 2 limbs until less than 2 limbs remain */ +Loope1: addxcc %g4,%g2,%o4 + ld [s1_ptr+8],%g4 + addxcc %g1,%g3,%o5 + ld [s1_ptr+12],%g1 + ldd [s2_ptr+8],%g2 + std %o4,[res_ptr+0] + addx %g0,%g0,%o4 ! save cy in register + addcc size,-2,size + add s1_ptr,8,s1_ptr + add s2_ptr,8,s2_ptr + add res_ptr,8,res_ptr + bge Loope1 + subcc %g0,%o4,%g0 ! restore cy +Lend1: addxcc %g4,%g2,%o4 + addxcc %g1,%g3,%o5 + std %o4,[res_ptr+0] + addx %g0,%g0,%o4 ! save cy in register + + andcc size,1,%g0 + be Lret1 + subcc %g0,%o4,%g0 ! restore cy +/* Add last limb */ + ld [s1_ptr+8],%g4 + ld [s2_ptr+8],%g2 + addxcc %g4,%g2,%o4 + st %o4,[res_ptr+8] + +Lret1: retl + addx %g0,%g0,%o0 ! return carry-out from most sign. limb + +L1: xor s1_ptr,res_ptr,%g1 + andcc %g1,4,%g0 + bne L2 + nop +! ** V1b ** + mov s2_ptr,%g1 + mov s1_ptr,s2_ptr + b L0 + mov %g1,s1_ptr + +! ** V2 ** +/* If we come here, the alignment of s1_ptr and res_ptr as well as the + alignment of s2_ptr and res_ptr differ. Since there are only two ways + things can be aligned (that we care about) we now know that the alignment + of s1_ptr and s2_ptr are the same. */ + +L2: cmp size,1 + be Ljone + nop + andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0 + be L_v2 ! if no, branch + nop +/* Add least significant limb separately to align s1_ptr and s2_ptr */ + ld [s1_ptr],%g4 + add s1_ptr,4,s1_ptr + ld [s2_ptr],%g2 + add s2_ptr,4,s2_ptr + add size,-1,size + addcc %g4,%g2,%o4 + st %o4,[res_ptr] + add res_ptr,4,res_ptr + +L_v2: addx %g0,%g0,%o4 ! save cy in register + addcc size,-8,size + blt Lfin2 + subcc %g0,%o4,%g0 ! restore cy +/* Add blocks of 8 limbs until less than 8 limbs remain */ +Loop2: ldd [s1_ptr+0],%g2 + ldd [s2_ptr+0],%o4 + addxcc %g2,%o4,%g2 + st %g2,[res_ptr+0] + addxcc %g3,%o5,%g3 + st %g3,[res_ptr+4] + ldd [s1_ptr+8],%g2 + ldd [s2_ptr+8],%o4 + addxcc %g2,%o4,%g2 + st %g2,[res_ptr+8] + addxcc %g3,%o5,%g3 + st %g3,[res_ptr+12] + ldd [s1_ptr+16],%g2 + ldd [s2_ptr+16],%o4 + addxcc %g2,%o4,%g2 + st %g2,[res_ptr+16] + addxcc %g3,%o5,%g3 + st %g3,[res_ptr+20] + ldd [s1_ptr+24],%g2 + ldd [s2_ptr+24],%o4 + addxcc %g2,%o4,%g2 + st %g2,[res_ptr+24] + addxcc %g3,%o5,%g3 + st %g3,[res_ptr+28] + addx %g0,%g0,%o4 ! save cy in register + addcc size,-8,size + add s1_ptr,32,s1_ptr + add s2_ptr,32,s2_ptr + add res_ptr,32,res_ptr + bge Loop2 + subcc %g0,%o4,%g0 ! restore cy + +Lfin2: addcc size,8-2,size + blt Lend2 + subcc %g0,%o4,%g0 ! restore cy +Loope2: ldd [s1_ptr+0],%g2 + ldd [s2_ptr+0],%o4 + addxcc %g2,%o4,%g2 + st %g2,[res_ptr+0] + addxcc %g3,%o5,%g3 + st %g3,[res_ptr+4] + addx %g0,%g0,%o4 ! save cy in register + addcc size,-2,size + add s1_ptr,8,s1_ptr + add s2_ptr,8,s2_ptr + add res_ptr,8,res_ptr + bge Loope2 + subcc %g0,%o4,%g0 ! restore cy +Lend2: andcc size,1,%g0 + be Lret2 + subcc %g0,%o4,%g0 ! restore cy +/* Add last limb */ +Ljone: ld [s1_ptr],%g4 + ld [s2_ptr],%g2 + addxcc %g4,%g2,%o4 + st %o4,[res_ptr] + +Lret2: retl + addx %g0,%g0,%o0 ! return carry-out from most sign. limb + + + diff --git a/comm/third_party/libgcrypt/mpi/sparc32/mpih-lshift.S b/comm/third_party/libgcrypt/mpi/sparc32/mpih-lshift.S new file mode 100644 index 0000000000..3422ab04e5 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/sparc32/mpih-lshift.S @@ -0,0 +1,97 @@ +/* sparc lshift + * + * Copyright (C) 1995, 1996, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + +! INPUT PARAMETERS +! res_ptr %o0 +! src_ptr %o1 +! size %o2 +! cnt %o3 + +#include "sysdep.h" + + .text + .align 4 + .global C_SYMBOL_NAME(_gcry_mpih_lshift) +C_SYMBOL_NAME(_gcry_mpih_lshift): + sll %o2,2,%g1 + add %o1,%g1,%o1 ! make %o1 point at end of src + ld [%o1-4],%g2 ! load first limb + sub %g0,%o3,%o5 ! negate shift count + add %o0,%g1,%o0 ! make %o0 point at end of res + add %o2,-1,%o2 + andcc %o2,4-1,%g4 ! number of limbs in first loop + srl %g2,%o5,%g1 ! compute function result + be L0 ! if multiple of 4 limbs, skip first loop + st %g1,[%sp+80] + + sub %o2,%g4,%o2 ! adjust count for main loop + +Loop0: ld [%o1-8],%g3 + add %o0,-4,%o0 + add %o1,-4,%o1 + addcc %g4,-1,%g4 + sll %g2,%o3,%o4 + srl %g3,%o5,%g1 + mov %g3,%g2 + or %o4,%g1,%o4 + bne Loop0 + st %o4,[%o0+0] + +L0: tst %o2 + be Lend + nop + +Loop: ld [%o1-8],%g3 + add %o0,-16,%o0 + addcc %o2,-4,%o2 + sll %g2,%o3,%o4 + srl %g3,%o5,%g1 + + ld [%o1-12],%g2 + sll %g3,%o3,%g4 + or %o4,%g1,%o4 + st %o4,[%o0+12] + srl %g2,%o5,%g1 + + ld [%o1-16],%g3 + sll %g2,%o3,%o4 + or %g4,%g1,%g4 + st %g4,[%o0+8] + srl %g3,%o5,%g1 + + ld [%o1-20],%g2 + sll %g3,%o3,%g4 + or %o4,%g1,%o4 + st %o4,[%o0+4] + srl %g2,%o5,%g1 + + add %o1,-16,%o1 + or %g4,%g1,%g4 + bne Loop + st %g4,[%o0+0] + +Lend: sll %g2,%o3,%g2 + st %g2,[%o0-4] + retl + ld [%sp+80],%o0 + diff --git a/comm/third_party/libgcrypt/mpi/sparc32/mpih-rshift.S b/comm/third_party/libgcrypt/mpi/sparc32/mpih-rshift.S new file mode 100644 index 0000000000..cd3db41df3 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/sparc32/mpih-rshift.S @@ -0,0 +1,93 @@ +/* sparc rshift + * + * Copyright (C) 1995, 1996, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +! INPUT PARAMETERS +! res_ptr %o0 +! src_ptr %o1 +! size %o2 +! cnt %o3 + +#include "sysdep.h" + + .text + .align 4 + .global C_SYMBOL_NAME(_gcry_mpih_rshift) +C_SYMBOL_NAME(_gcry_mpih_rshift): + ld [%o1],%g2 ! load first limb + sub %g0,%o3,%o5 ! negate shift count + add %o2,-1,%o2 + andcc %o2,4-1,%g4 ! number of limbs in first loop + sll %g2,%o5,%g1 ! compute function result + be L0 ! if multiple of 4 limbs, skip first loop + st %g1,[%sp+80] + + sub %o2,%g4,%o2 ! adjust count for main loop + +Loop0: ld [%o1+4],%g3 + add %o0,4,%o0 + add %o1,4,%o1 + addcc %g4,-1,%g4 + srl %g2,%o3,%o4 + sll %g3,%o5,%g1 + mov %g3,%g2 + or %o4,%g1,%o4 + bne Loop0 + st %o4,[%o0-4] + +L0: tst %o2 + be Lend + nop + +Loop: ld [%o1+4],%g3 + add %o0,16,%o0 + addcc %o2,-4,%o2 + srl %g2,%o3,%o4 + sll %g3,%o5,%g1 + + ld [%o1+8],%g2 + srl %g3,%o3,%g4 + or %o4,%g1,%o4 + st %o4,[%o0-16] + sll %g2,%o5,%g1 + + ld [%o1+12],%g3 + srl %g2,%o3,%o4 + or %g4,%g1,%g4 + st %g4,[%o0-12] + sll %g3,%o5,%g1 + + ld [%o1+16],%g2 + srl %g3,%o3,%g4 + or %o4,%g1,%o4 + st %o4,[%o0-8] + sll %g2,%o5,%g1 + + add %o1,16,%o1 + or %g4,%g1,%g4 + bne Loop + st %g4,[%o0-4] + +Lend: srl %g2,%o3,%g2 + st %g2,[%o0-0] + retl + ld [%sp+80],%o0 + diff --git a/comm/third_party/libgcrypt/mpi/sparc32/udiv.S b/comm/third_party/libgcrypt/mpi/sparc32/udiv.S new file mode 100644 index 0000000000..006b5c125c --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/sparc32/udiv.S @@ -0,0 +1,195 @@ +/* SPARC v7 __udiv_qrnnd division support, used from longlong.h. + * This is for v7 CPUs without a floating-point unit. + * + * Copyright (C) 1993, 1994, 1996, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +! INPUT PARAMETERS +! rem_ptr o0 +! n1 o1 +! n0 o2 +! d o3 + +#include "sysdep.h" + + .text + .align 4 + .global C_SYMBOL_NAME(__udiv_qrnnd) +C_SYMBOL_NAME(__udiv_qrnnd): + tst %o3 + bneg Largedivisor + mov 8,%g1 + + b Lp1 + addxcc %o2,%o2,%o2 + +Lplop: bcc Ln1 + addxcc %o2,%o2,%o2 +Lp1: addx %o1,%o1,%o1 + subcc %o1,%o3,%o4 + bcc Ln2 + addxcc %o2,%o2,%o2 +Lp2: addx %o1,%o1,%o1 + subcc %o1,%o3,%o4 + bcc Ln3 + addxcc %o2,%o2,%o2 +Lp3: addx %o1,%o1,%o1 + subcc %o1,%o3,%o4 + bcc Ln4 + addxcc %o2,%o2,%o2 +Lp4: addx %o1,%o1,%o1 + addcc %g1,-1,%g1 + bne Lplop + subcc %o1,%o3,%o4 + bcc Ln5 + addxcc %o2,%o2,%o2 +Lp5: st %o1,[%o0] + retl + xnor %g0,%o2,%o0 + +Lnlop: bcc Lp1 + addxcc %o2,%o2,%o2 +Ln1: addx %o4,%o4,%o4 + subcc %o4,%o3,%o1 + bcc Lp2 + addxcc %o2,%o2,%o2 +Ln2: addx %o4,%o4,%o4 + subcc %o4,%o3,%o1 + bcc Lp3 + addxcc %o2,%o2,%o2 +Ln3: addx %o4,%o4,%o4 + subcc %o4,%o3,%o1 + bcc Lp4 + addxcc %o2,%o2,%o2 +Ln4: addx %o4,%o4,%o4 + addcc %g1,-1,%g1 + bne Lnlop + subcc %o4,%o3,%o1 + bcc Lp5 + addxcc %o2,%o2,%o2 +Ln5: st %o4,[%o0] + retl + xnor %g0,%o2,%o0 + +Largedivisor: + and %o2,1,%o5 ! %o5 = n0 & 1 + + srl %o2,1,%o2 + sll %o1,31,%g2 + or %g2,%o2,%o2 ! %o2 = lo(n1n0 >> 1) + srl %o1,1,%o1 ! %o1 = hi(n1n0 >> 1) + + and %o3,1,%g2 + srl %o3,1,%g3 ! %g3 = floor(d / 2) + add %g3,%g2,%g3 ! %g3 = ceil(d / 2) + + b LLp1 + addxcc %o2,%o2,%o2 + +LLplop: bcc LLn1 + addxcc %o2,%o2,%o2 +LLp1: addx %o1,%o1,%o1 + subcc %o1,%g3,%o4 + bcc LLn2 + addxcc %o2,%o2,%o2 +LLp2: addx %o1,%o1,%o1 + subcc %o1,%g3,%o4 + bcc LLn3 + addxcc %o2,%o2,%o2 +LLp3: addx %o1,%o1,%o1 + subcc %o1,%g3,%o4 + bcc LLn4 + addxcc %o2,%o2,%o2 +LLp4: addx %o1,%o1,%o1 + addcc %g1,-1,%g1 + bne LLplop + subcc %o1,%g3,%o4 + bcc LLn5 + addxcc %o2,%o2,%o2 +LLp5: add %o1,%o1,%o1 ! << 1 + tst %g2 + bne Oddp + add %o5,%o1,%o1 + st %o1,[%o0] + retl + xnor %g0,%o2,%o0 + +LLnlop: bcc LLp1 + addxcc %o2,%o2,%o2 +LLn1: addx %o4,%o4,%o4 + subcc %o4,%g3,%o1 + bcc LLp2 + addxcc %o2,%o2,%o2 +LLn2: addx %o4,%o4,%o4 + subcc %o4,%g3,%o1 + bcc LLp3 + addxcc %o2,%o2,%o2 +LLn3: addx %o4,%o4,%o4 + subcc %o4,%g3,%o1 + bcc LLp4 + addxcc %o2,%o2,%o2 +LLn4: addx %o4,%o4,%o4 + addcc %g1,-1,%g1 + bne LLnlop + subcc %o4,%g3,%o1 + bcc LLp5 + addxcc %o2,%o2,%o2 +LLn5: add %o4,%o4,%o4 ! << 1 + tst %g2 + bne Oddn + add %o5,%o4,%o4 + st %o4,[%o0] + retl + xnor %g0,%o2,%o0 + +Oddp: xnor %g0,%o2,%o2 + ! q' in %o2. r' in %o1 + addcc %o1,%o2,%o1 + bcc LLp6 + addx %o2,0,%o2 + sub %o1,%o3,%o1 +LLp6: subcc %o1,%o3,%g0 + bcs LLp7 + subx %o2,-1,%o2 + sub %o1,%o3,%o1 +LLp7: st %o1,[%o0] + retl + mov %o2,%o0 + +Oddn: xnor %g0,%o2,%o2 + ! q' in %o2. r' in %o4 + addcc %o4,%o2,%o4 + bcc LLn6 + addx %o2,0,%o2 + sub %o4,%o3,%o4 +LLn6: subcc %o4,%o3,%g0 + bcs LLn7 + subx %o2,-1,%o2 + sub %o4,%o3,%o4 +LLn7: st %o4,[%o0] + retl + mov %o2,%o0 diff --git a/comm/third_party/libgcrypt/mpi/sparc32v8/distfiles b/comm/third_party/libgcrypt/mpi/sparc32v8/distfiles new file mode 100644 index 0000000000..2fcb0d1aa0 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/sparc32v8/distfiles @@ -0,0 +1,4 @@ +mpih-mul1.S +mpih-mul2.S +mpih-mul3.S + diff --git a/comm/third_party/libgcrypt/mpi/sparc32v8/mpih-mul1.S b/comm/third_party/libgcrypt/mpi/sparc32v8/mpih-mul1.S new file mode 100644 index 0000000000..03fcddab0e --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/sparc32v8/mpih-mul1.S @@ -0,0 +1,109 @@ +/* SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and + * store the product in a second limb vector. + * + * Copyright (C) 1992, 1994, 1995, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +#include "sysdep.h" + +.text + .align 8 + .global C_SYMBOL_NAME(_gcry_mpih_mul_1) +C_SYMBOL_NAME(_gcry_mpih_mul_1): + sll %o2,4,%g1 + and %g1,(4-1)<<4,%g1 +#if PIC + mov %o7,%g4 ! Save return address register + call 1f + add %o7,LL-1f,%g3 +1: mov %g4,%o7 ! Restore return address register +#else + sethi %hi(LL),%g3 + or %g3,%lo(LL),%g3 +#endif + jmp %g3+%g1 + ld [%o1+0],%o4 ! 1 +LL: +LL00: add %o0,-4,%o0 + add %o1,-4,%o1 + b Loop00 /* 4, 8, 12, ... */ + orcc %g0,%g0,%g2 +LL01: b Loop01 /* 1, 5, 9, ... */ + orcc %g0,%g0,%g2 + nop + nop +LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */ + add %o1,4,%o1 + b Loop10 + orcc %g0,%g0,%g2 + nop +LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */ + add %o1,-8,%o1 + b Loop11 + orcc %g0,%g0,%g2 + +Loop: addcc %g3,%g2,%g3 ! 1 + ld [%o1+4],%o4 ! 2 + st %g3,[%o0+0] ! 1 + rd %y,%g2 ! 1 +Loop00: umul %o4,%o3,%g3 ! 2 + addxcc %g3,%g2,%g3 ! 2 + ld [%o1+8],%o4 ! 3 + st %g3,[%o0+4] ! 2 + rd %y,%g2 ! 2 +Loop11: umul %o4,%o3,%g3 ! 3 + addxcc %g3,%g2,%g3 ! 3 + ld [%o1+12],%o4 ! 4 + add %o1,16,%o1 + st %g3,[%o0+8] ! 3 + rd %y,%g2 ! 3 +Loop10: umul %o4,%o3,%g3 ! 4 + addxcc %g3,%g2,%g3 ! 4 + ld [%o1+0],%o4 ! 1 + st %g3,[%o0+12] ! 4 + add %o0,16,%o0 + rd %y,%g2 ! 4 + addx %g0,%g2,%g2 +Loop01: addcc %o2,-4,%o2 + bg Loop + umul %o4,%o3,%g3 ! 1 + + addcc %g3,%g2,%g3 ! 4 + st %g3,[%o0+0] ! 4 + rd %y,%g2 ! 4 + + retl + addx %g0,%g2,%o0 + + diff --git a/comm/third_party/libgcrypt/mpi/sparc32v8/mpih-mul2.S b/comm/third_party/libgcrypt/mpi/sparc32v8/mpih-mul2.S new file mode 100644 index 0000000000..6f5cc436a7 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/sparc32v8/mpih-mul2.S @@ -0,0 +1,132 @@ +/* SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and + * add the result to a second limb vector. + * + * Copyright (C) 1992, 1993, 1994, 1995, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +#include "sysdep.h" + +.text + .align 4 + .global C_SYMBOL_NAME(_gcry_mpih_addmul_1) +C_SYMBOL_NAME(_gcry_mpih_addmul_1): + orcc %g0,%g0,%g2 + ld [%o1+0],%o4 ! 1 + + sll %o2,4,%g1 + and %g1,(4-1)<<4,%g1 +#if PIC + mov %o7,%g4 ! Save return address register + call 1f + add %o7,LL-1f,%g3 +1: mov %g4,%o7 ! Restore return address register +#else + sethi %hi(LL),%g3 + or %g3,%lo(LL),%g3 +#endif + jmp %g3+%g1 + nop +LL: +LL00: add %o0,-4,%o0 + b Loop00 /* 4, 8, 12, ... */ + add %o1,-4,%o1 + nop +LL01: b Loop01 /* 1, 5, 9, ... */ + nop + nop + nop +LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */ + b Loop10 + add %o1,4,%o1 + nop +LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */ + b Loop11 + add %o1,-8,%o1 + nop + +1: addcc %g3,%g2,%g3 ! 1 + ld [%o1+4],%o4 ! 2 + rd %y,%g2 ! 1 + addx %g0,%g2,%g2 + ld [%o0+0],%g1 ! 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+0] ! 1 +Loop00: umul %o4,%o3,%g3 ! 2 + ld [%o0+4],%g1 ! 2 + addxcc %g3,%g2,%g3 ! 2 + ld [%o1+8],%o4 ! 3 + rd %y,%g2 ! 2 + addx %g0,%g2,%g2 + nop + addcc %g1,%g3,%g3 + st %g3,[%o0+4] ! 2 +Loop11: umul %o4,%o3,%g3 ! 3 + addxcc %g3,%g2,%g3 ! 3 + ld [%o1+12],%o4 ! 4 + rd %y,%g2 ! 3 + add %o1,16,%o1 + addx %g0,%g2,%g2 + ld [%o0+8],%g1 ! 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+8] ! 3 +Loop10: umul %o4,%o3,%g3 ! 4 + addxcc %g3,%g2,%g3 ! 4 + ld [%o1+0],%o4 ! 1 + rd %y,%g2 ! 4 + addx %g0,%g2,%g2 + ld [%o0+12],%g1 ! 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+12] ! 4 + add %o0,16,%o0 + addx %g0,%g2,%g2 +Loop01: addcc %o2,-4,%o2 + bg 1b + umul %o4,%o3,%g3 ! 1 + + addcc %g3,%g2,%g3 ! 4 + rd %y,%g2 ! 4 + addx %g0,%g2,%g2 + ld [%o0+0],%g1 ! 2 + addcc %g1,%g3,%g3 + st %g3,[%o0+0] ! 4 + addx %g0,%g2,%o0 + + retl + nop + + +! umul, ld, addxcc, rd, st + +! umul, ld, addxcc, rd, ld, addcc, st, addx + diff --git a/comm/third_party/libgcrypt/mpi/sparc32v8/mpih-mul3.S b/comm/third_party/libgcrypt/mpi/sparc32v8/mpih-mul3.S new file mode 100644 index 0000000000..93bb19433d --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/sparc32v8/mpih-mul3.S @@ -0,0 +1,67 @@ +/* SPARC v8 __mpn_submul_1 -- Multiply a limb vector with a limb and + * subtract the result from a second limb vector. + * + * Copyright (C) 1992, 1993, 1994, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +! INPUT PARAMETERS +! res_ptr o0 +! s1_ptr o1 +! size o2 +! s2_limb o3 + +#include "sysdep.h" + +.text + .align 4 + .global C_SYMBOL_NAME(_gcry_mpih_submul_1) +C_SYMBOL_NAME(_gcry_mpih_submul_1): + sub %g0,%o2,%o2 ! negate ... + sll %o2,2,%o2 ! ... and scale size + sub %o1,%o2,%o1 ! o1 is offset s1_ptr + sub %o0,%o2,%g1 ! g1 is offset res_ptr + + mov 0,%o0 ! clear cy_limb + +Loop: ld [%o1+%o2],%o4 + ld [%g1+%o2],%g2 + umul %o4,%o3,%o5 + rd %y,%g3 + addcc %o5,%o0,%o5 + addx %g3,0,%o0 + subcc %g2,%o5,%g2 + addx %o0,0,%o0 + st %g2,[%g1+%o2] + + addcc %o2,4,%o2 + bne Loop + nop + + retl + nop + + diff --git a/comm/third_party/libgcrypt/mpi/supersparc/distfiles b/comm/third_party/libgcrypt/mpi/supersparc/distfiles new file mode 100644 index 0000000000..550601cb58 --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/supersparc/distfiles @@ -0,0 +1,2 @@ +udiv.S + diff --git a/comm/third_party/libgcrypt/mpi/supersparc/udiv.S b/comm/third_party/libgcrypt/mpi/supersparc/udiv.S new file mode 100644 index 0000000000..79e506a11f --- /dev/null +++ b/comm/third_party/libgcrypt/mpi/supersparc/udiv.S @@ -0,0 +1,118 @@ +/* SuperSPARC __udiv_qrnnd division support, used from longlong.h. + * This is for SuperSPARC only, to compensate for its + * semi-functional udiv instruction. + * + * Copyright (C) 1993, 1994, 1996, 1998, + * 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + + +! INPUT PARAMETERS +! rem_ptr i0 +! n1 i1 +! n0 i2 +! d i3 + +#include "sysdep.h" +#undef ret /* Kludge for glibc */ + + .text + .align 8 +LC0: .double 0r4294967296 +LC1: .double 0r2147483648 + + .align 4 + .global C_SYMBOL_NAME(__udiv_qrnnd) +C_SYMBOL_NAME(__udiv_qrnnd): + !#PROLOGUE# 0 + save %sp,-104,%sp + !#PROLOGUE# 1 + st %i1,[%fp-8] + ld [%fp-8],%f10 + sethi %hi(LC0),%o7 + fitod %f10,%f4 + ldd [%o7+%lo(LC0)],%f8 + cmp %i1,0 + bge L248 + mov %i0,%i5 + faddd %f4,%f8,%f4 +L248: + st %i2,[%fp-8] + ld [%fp-8],%f10 + fmuld %f4,%f8,%f6 + cmp %i2,0 + bge L249 + fitod %f10,%f2 + faddd %f2,%f8,%f2 +L249: + st %i3,[%fp-8] + faddd %f6,%f2,%f2 + ld [%fp-8],%f10 + cmp %i3,0 + bge L250 + fitod %f10,%f4 + faddd %f4,%f8,%f4 +L250: + fdivd %f2,%f4,%f2 + sethi %hi(LC1),%o7 + ldd [%o7+%lo(LC1)],%f4 + fcmped %f2,%f4 + nop + fbge,a L251 + fsubd %f2,%f4,%f2 + fdtoi %f2,%f2 + st %f2,[%fp-8] + b L252 + ld [%fp-8],%i4 +L251: + fdtoi %f2,%f2 + st %f2,[%fp-8] + ld [%fp-8],%i4 + sethi %hi(-2147483648),%g2 + xor %i4,%g2,%i4 +L252: + umul %i3,%i4,%g3 + rd %y,%i0 + subcc %i2,%g3,%o7 + subxcc %i1,%i0,%g0 + be L253 + cmp %o7,%i3 + + add %i4,-1,%i0 + add %o7,%i3,%o7 + st %o7,[%i5] + ret + restore +L253: + blu L246 + mov %i4,%i0 + add %i4,1,%i0 + sub %o7,%i3,%o7 +L246: + st %o7,[%i5] + ret + restore + |