diff options
Diffstat (limited to 'lib/mpi')
-rw-r--r-- | lib/mpi/Makefile | 30 | ||||
-rw-r--r-- | lib/mpi/ec.c | 1509 | ||||
-rw-r--r-- | lib/mpi/generic_mpih-add1.c | 48 | ||||
-rw-r--r-- | lib/mpi/generic_mpih-lshift.c | 50 | ||||
-rw-r--r-- | lib/mpi/generic_mpih-mul1.c | 44 | ||||
-rw-r--r-- | lib/mpi/generic_mpih-mul2.c | 47 | ||||
-rw-r--r-- | lib/mpi/generic_mpih-mul3.c | 48 | ||||
-rw-r--r-- | lib/mpi/generic_mpih-rshift.c | 50 | ||||
-rw-r--r-- | lib/mpi/generic_mpih-sub1.c | 47 | ||||
-rw-r--r-- | lib/mpi/longlong.h | 1361 | ||||
-rw-r--r-- | lib/mpi/mpi-add.c | 155 | ||||
-rw-r--r-- | lib/mpi/mpi-bit.c | 307 | ||||
-rw-r--r-- | lib/mpi/mpi-cmp.c | 98 | ||||
-rw-r--r-- | lib/mpi/mpi-div.c | 234 | ||||
-rw-r--r-- | lib/mpi/mpi-inline.h | 109 | ||||
-rw-r--r-- | lib/mpi/mpi-internal.h | 232 | ||||
-rw-r--r-- | lib/mpi/mpi-inv.c | 143 | ||||
-rw-r--r-- | lib/mpi/mpi-mod.c | 157 | ||||
-rw-r--r-- | lib/mpi/mpi-mul.c | 91 | ||||
-rw-r--r-- | lib/mpi/mpi-pow.c | 314 | ||||
-rw-r--r-- | lib/mpi/mpi-sub-ui.c | 78 | ||||
-rw-r--r-- | lib/mpi/mpicoder.c | 752 | ||||
-rw-r--r-- | lib/mpi/mpih-cmp.c | 43 | ||||
-rw-r--r-- | lib/mpi/mpih-div.c | 517 | ||||
-rw-r--r-- | lib/mpi/mpih-mul.c | 509 | ||||
-rw-r--r-- | lib/mpi/mpiutil.c | 330 |
26 files changed, 7303 insertions, 0 deletions
diff --git a/lib/mpi/Makefile b/lib/mpi/Makefile new file mode 100644 index 000000000..6e6ef9a34 --- /dev/null +++ b/lib/mpi/Makefile @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# MPI multiprecision maths library (from gpg) +# + +obj-$(CONFIG_MPILIB) = mpi.o + +mpi-y = \ + generic_mpih-lshift.o \ + generic_mpih-mul1.o \ + generic_mpih-mul2.o \ + generic_mpih-mul3.o \ + generic_mpih-rshift.o \ + generic_mpih-sub1.o \ + generic_mpih-add1.o \ + ec.o \ + mpicoder.o \ + mpi-add.o \ + mpi-bit.o \ + mpi-cmp.o \ + mpi-sub-ui.o \ + mpi-div.o \ + mpi-inv.o \ + mpi-mod.o \ + mpi-mul.o \ + mpih-cmp.o \ + mpih-div.o \ + mpih-mul.o \ + mpi-pow.o \ + mpiutil.o diff --git a/lib/mpi/ec.c b/lib/mpi/ec.c new file mode 100644 index 000000000..c21470122 --- /dev/null +++ b/lib/mpi/ec.c @@ -0,0 +1,1509 @@ +/* ec.c - Elliptic Curve functions + * Copyright (C) 2007 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +#define point_init(a) mpi_point_init((a)) +#define point_free(a) mpi_point_free_parts((a)) + +#define log_error(fmt, ...) pr_err(fmt, ##__VA_ARGS__) +#define log_fatal(fmt, ...) pr_err(fmt, ##__VA_ARGS__) + +#define DIM(v) (sizeof(v)/sizeof((v)[0])) + + +/* Create a new point option. NBITS gives the size in bits of one + * coordinate; it is only used to pre-allocate some resources and + * might also be passed as 0 to use a default value. + */ +MPI_POINT mpi_point_new(unsigned int nbits) +{ + MPI_POINT p; + + (void)nbits; /* Currently not used. */ + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (p) + mpi_point_init(p); + return p; +} +EXPORT_SYMBOL_GPL(mpi_point_new); + +/* Release the point object P. P may be NULL. */ +void mpi_point_release(MPI_POINT p) +{ + if (p) { + mpi_point_free_parts(p); + kfree(p); + } +} +EXPORT_SYMBOL_GPL(mpi_point_release); + +/* Initialize the fields of a point object. gcry_mpi_point_free_parts + * may be used to release the fields. + */ +void mpi_point_init(MPI_POINT p) +{ + p->x = mpi_new(0); + p->y = mpi_new(0); + p->z = mpi_new(0); +} +EXPORT_SYMBOL_GPL(mpi_point_init); + +/* Release the parts of a point object. */ +void mpi_point_free_parts(MPI_POINT p) +{ + mpi_free(p->x); p->x = NULL; + mpi_free(p->y); p->y = NULL; + mpi_free(p->z); p->z = NULL; +} +EXPORT_SYMBOL_GPL(mpi_point_free_parts); + +/* Set the value from S into D. */ +static void point_set(MPI_POINT d, MPI_POINT s) +{ + mpi_set(d->x, s->x); + mpi_set(d->y, s->y); + mpi_set(d->z, s->z); +} + +static void point_resize(MPI_POINT p, struct mpi_ec_ctx *ctx) +{ + size_t nlimbs = ctx->p->nlimbs; + + mpi_resize(p->x, nlimbs); + p->x->nlimbs = nlimbs; + mpi_resize(p->z, nlimbs); + p->z->nlimbs = nlimbs; + + if (ctx->model != MPI_EC_MONTGOMERY) { + mpi_resize(p->y, nlimbs); + p->y->nlimbs = nlimbs; + } +} + +static void point_swap_cond(MPI_POINT d, MPI_POINT s, unsigned long swap, + struct mpi_ec_ctx *ctx) +{ + mpi_swap_cond(d->x, s->x, swap); + if (ctx->model != MPI_EC_MONTGOMERY) + mpi_swap_cond(d->y, s->y, swap); + mpi_swap_cond(d->z, s->z, swap); +} + + +/* W = W mod P. */ +static void ec_mod(MPI w, struct mpi_ec_ctx *ec) +{ + if (ec->t.p_barrett) + mpi_mod_barrett(w, w, ec->t.p_barrett); + else + mpi_mod(w, w, ec->p); +} + +static void ec_addm(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx) +{ + mpi_add(w, u, v); + ec_mod(w, ctx); +} + +static void ec_subm(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ec) +{ + mpi_sub(w, u, v); + while (w->sign) + mpi_add(w, w, ec->p); + /*ec_mod(w, ec);*/ +} + +static void ec_mulm(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx) +{ + mpi_mul(w, u, v); + ec_mod(w, ctx); +} + +/* W = 2 * U mod P. */ +static void ec_mul2(MPI w, MPI u, struct mpi_ec_ctx *ctx) +{ + mpi_lshift(w, u, 1); + ec_mod(w, ctx); +} + +static void ec_powm(MPI w, const MPI b, const MPI e, + struct mpi_ec_ctx *ctx) +{ + mpi_powm(w, b, e, ctx->p); + /* mpi_abs(w); */ +} + +/* Shortcut for + * ec_powm(B, B, mpi_const(MPI_C_TWO), ctx); + * for easier optimization. + */ +static void ec_pow2(MPI w, const MPI b, struct mpi_ec_ctx *ctx) +{ + /* Using mpi_mul is slightly faster (at least on amd64). */ + /* mpi_powm(w, b, mpi_const(MPI_C_TWO), ctx->p); */ + ec_mulm(w, b, b, ctx); +} + +/* Shortcut for + * ec_powm(B, B, mpi_const(MPI_C_THREE), ctx); + * for easier optimization. + */ +static void ec_pow3(MPI w, const MPI b, struct mpi_ec_ctx *ctx) +{ + mpi_powm(w, b, mpi_const(MPI_C_THREE), ctx->p); +} + +static void ec_invm(MPI x, MPI a, struct mpi_ec_ctx *ctx) +{ + if (!mpi_invm(x, a, ctx->p)) + log_error("ec_invm: inverse does not exist:\n"); +} + +static void mpih_set_cond(mpi_ptr_t wp, mpi_ptr_t up, + mpi_size_t usize, unsigned long set) +{ + mpi_size_t i; + mpi_limb_t mask = ((mpi_limb_t)0) - set; + mpi_limb_t x; + + for (i = 0; i < usize; i++) { + x = mask & (wp[i] ^ up[i]); + wp[i] = wp[i] ^ x; + } +} + +/* Routines for 2^255 - 19. */ + +#define LIMB_SIZE_25519 ((256+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB) + +static void ec_addm_25519(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx) +{ + mpi_ptr_t wp, up, vp; + mpi_size_t wsize = LIMB_SIZE_25519; + mpi_limb_t n[LIMB_SIZE_25519]; + mpi_limb_t borrow; + + if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize) + log_bug("addm_25519: different sizes\n"); + + memset(n, 0, sizeof(n)); + up = u->d; + vp = v->d; + wp = w->d; + + mpihelp_add_n(wp, up, vp, wsize); + borrow = mpihelp_sub_n(wp, wp, ctx->p->d, wsize); + mpih_set_cond(n, ctx->p->d, wsize, (borrow != 0UL)); + mpihelp_add_n(wp, wp, n, wsize); + wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB)); +} + +static void ec_subm_25519(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx) +{ + mpi_ptr_t wp, up, vp; + mpi_size_t wsize = LIMB_SIZE_25519; + mpi_limb_t n[LIMB_SIZE_25519]; + mpi_limb_t borrow; + + if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize) + log_bug("subm_25519: different sizes\n"); + + memset(n, 0, sizeof(n)); + up = u->d; + vp = v->d; + wp = w->d; + + borrow = mpihelp_sub_n(wp, up, vp, wsize); + mpih_set_cond(n, ctx->p->d, wsize, (borrow != 0UL)); + mpihelp_add_n(wp, wp, n, wsize); + wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB)); +} + +static void ec_mulm_25519(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx) +{ + mpi_ptr_t wp, up, vp; + mpi_size_t wsize = LIMB_SIZE_25519; + mpi_limb_t n[LIMB_SIZE_25519*2]; + mpi_limb_t m[LIMB_SIZE_25519+1]; + mpi_limb_t cy; + int msb; + + (void)ctx; + if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize) + log_bug("mulm_25519: different sizes\n"); + + up = u->d; + vp = v->d; + wp = w->d; + + mpihelp_mul_n(n, up, vp, wsize); + memcpy(wp, n, wsize * BYTES_PER_MPI_LIMB); + wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB)); + + memcpy(m, n+LIMB_SIZE_25519-1, (wsize+1) * BYTES_PER_MPI_LIMB); + mpihelp_rshift(m, m, LIMB_SIZE_25519+1, (255 % BITS_PER_MPI_LIMB)); + + memcpy(n, m, wsize * BYTES_PER_MPI_LIMB); + cy = mpihelp_lshift(m, m, LIMB_SIZE_25519, 4); + m[LIMB_SIZE_25519] = cy; + cy = mpihelp_add_n(m, m, n, wsize); + m[LIMB_SIZE_25519] += cy; + cy = mpihelp_add_n(m, m, n, wsize); + m[LIMB_SIZE_25519] += cy; + cy = mpihelp_add_n(m, m, n, wsize); + m[LIMB_SIZE_25519] += cy; + + cy = mpihelp_add_n(wp, wp, m, wsize); + m[LIMB_SIZE_25519] += cy; + + memset(m, 0, wsize * BYTES_PER_MPI_LIMB); + msb = (wp[LIMB_SIZE_25519-1] >> (255 % BITS_PER_MPI_LIMB)); + m[0] = (m[LIMB_SIZE_25519] * 2 + msb) * 19; + wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB)); + mpihelp_add_n(wp, wp, m, wsize); + + m[0] = 0; + cy = mpihelp_sub_n(wp, wp, ctx->p->d, wsize); + mpih_set_cond(m, ctx->p->d, wsize, (cy != 0UL)); + mpihelp_add_n(wp, wp, m, wsize); +} + +static void ec_mul2_25519(MPI w, MPI u, struct mpi_ec_ctx *ctx) +{ + ec_addm_25519(w, u, u, ctx); +} + +static void ec_pow2_25519(MPI w, const MPI b, struct mpi_ec_ctx *ctx) +{ + ec_mulm_25519(w, b, b, ctx); +} + +/* Routines for 2^448 - 2^224 - 1. */ + +#define LIMB_SIZE_448 ((448+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB) +#define LIMB_SIZE_HALF_448 ((LIMB_SIZE_448+1)/2) + +static void ec_addm_448(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx) +{ + mpi_ptr_t wp, up, vp; + mpi_size_t wsize = LIMB_SIZE_448; + mpi_limb_t n[LIMB_SIZE_448]; + mpi_limb_t cy; + + if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize) + log_bug("addm_448: different sizes\n"); + + memset(n, 0, sizeof(n)); + up = u->d; + vp = v->d; + wp = w->d; + + cy = mpihelp_add_n(wp, up, vp, wsize); + mpih_set_cond(n, ctx->p->d, wsize, (cy != 0UL)); + mpihelp_sub_n(wp, wp, n, wsize); +} + +static void ec_subm_448(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx) +{ + mpi_ptr_t wp, up, vp; + mpi_size_t wsize = LIMB_SIZE_448; + mpi_limb_t n[LIMB_SIZE_448]; + mpi_limb_t borrow; + + if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize) + log_bug("subm_448: different sizes\n"); + + memset(n, 0, sizeof(n)); + up = u->d; + vp = v->d; + wp = w->d; + + borrow = mpihelp_sub_n(wp, up, vp, wsize); + mpih_set_cond(n, ctx->p->d, wsize, (borrow != 0UL)); + mpihelp_add_n(wp, wp, n, wsize); +} + +static void ec_mulm_448(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx) +{ + mpi_ptr_t wp, up, vp; + mpi_size_t wsize = LIMB_SIZE_448; + mpi_limb_t n[LIMB_SIZE_448*2]; + mpi_limb_t a2[LIMB_SIZE_HALF_448]; + mpi_limb_t a3[LIMB_SIZE_HALF_448]; + mpi_limb_t b0[LIMB_SIZE_HALF_448]; + mpi_limb_t b1[LIMB_SIZE_HALF_448]; + mpi_limb_t cy; + int i; +#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2) + mpi_limb_t b1_rest, a3_rest; +#endif + + if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize) + log_bug("mulm_448: different sizes\n"); + + up = u->d; + vp = v->d; + wp = w->d; + + mpihelp_mul_n(n, up, vp, wsize); + + for (i = 0; i < (wsize + 1) / 2; i++) { + b0[i] = n[i]; + b1[i] = n[i+wsize/2]; + a2[i] = n[i+wsize]; + a3[i] = n[i+wsize+wsize/2]; + } + +#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2) + b0[LIMB_SIZE_HALF_448-1] &= ((mpi_limb_t)1UL << 32)-1; + a2[LIMB_SIZE_HALF_448-1] &= ((mpi_limb_t)1UL << 32)-1; + + b1_rest = 0; + a3_rest = 0; + + for (i = (wsize + 1) / 2 - 1; i >= 0; i--) { + mpi_limb_t b1v, a3v; + b1v = b1[i]; + a3v = a3[i]; + b1[i] = (b1_rest << 32) | (b1v >> 32); + a3[i] = (a3_rest << 32) | (a3v >> 32); + b1_rest = b1v & (((mpi_limb_t)1UL << 32)-1); + a3_rest = a3v & (((mpi_limb_t)1UL << 32)-1); + } +#endif + + cy = mpihelp_add_n(b0, b0, a2, LIMB_SIZE_HALF_448); + cy += mpihelp_add_n(b0, b0, a3, LIMB_SIZE_HALF_448); + for (i = 0; i < (wsize + 1) / 2; i++) + wp[i] = b0[i]; +#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2) + wp[LIMB_SIZE_HALF_448-1] &= (((mpi_limb_t)1UL << 32)-1); +#endif + +#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2) + cy = b0[LIMB_SIZE_HALF_448-1] >> 32; +#endif + + cy = mpihelp_add_1(b1, b1, LIMB_SIZE_HALF_448, cy); + cy += mpihelp_add_n(b1, b1, a2, LIMB_SIZE_HALF_448); + cy += mpihelp_add_n(b1, b1, a3, LIMB_SIZE_HALF_448); + cy += mpihelp_add_n(b1, b1, a3, LIMB_SIZE_HALF_448); +#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2) + b1_rest = 0; + for (i = (wsize + 1) / 2 - 1; i >= 0; i--) { + mpi_limb_t b1v = b1[i]; + b1[i] = (b1_rest << 32) | (b1v >> 32); + b1_rest = b1v & (((mpi_limb_t)1UL << 32)-1); + } + wp[LIMB_SIZE_HALF_448-1] |= (b1_rest << 32); +#endif + for (i = 0; i < wsize / 2; i++) + wp[i+(wsize + 1) / 2] = b1[i]; + +#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2) + cy = b1[LIMB_SIZE_HALF_448-1]; +#endif + + memset(n, 0, wsize * BYTES_PER_MPI_LIMB); + +#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2) + n[LIMB_SIZE_HALF_448-1] = cy << 32; +#else + n[LIMB_SIZE_HALF_448] = cy; +#endif + n[0] = cy; + mpihelp_add_n(wp, wp, n, wsize); + + memset(n, 0, wsize * BYTES_PER_MPI_LIMB); + cy = mpihelp_sub_n(wp, wp, ctx->p->d, wsize); + mpih_set_cond(n, ctx->p->d, wsize, (cy != 0UL)); + mpihelp_add_n(wp, wp, n, wsize); +} + +static void ec_mul2_448(MPI w, MPI u, struct mpi_ec_ctx *ctx) +{ + ec_addm_448(w, u, u, ctx); +} + +static void ec_pow2_448(MPI w, const MPI b, struct mpi_ec_ctx *ctx) +{ + ec_mulm_448(w, b, b, ctx); +} + +struct field_table { + const char *p; + + /* computation routines for the field. */ + void (*addm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx); + void (*subm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx); + void (*mulm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx); + void (*mul2)(MPI w, MPI u, struct mpi_ec_ctx *ctx); + void (*pow2)(MPI w, const MPI b, struct mpi_ec_ctx *ctx); +}; + +static const struct field_table field_table[] = { + { + "0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED", + ec_addm_25519, + ec_subm_25519, + ec_mulm_25519, + ec_mul2_25519, + ec_pow2_25519 + }, + { + "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE" + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", + ec_addm_448, + ec_subm_448, + ec_mulm_448, + ec_mul2_448, + ec_pow2_448 + }, + { NULL, NULL, NULL, NULL, NULL, NULL }, +}; + +/* Force recomputation of all helper variables. */ +static void mpi_ec_get_reset(struct mpi_ec_ctx *ec) +{ + ec->t.valid.a_is_pminus3 = 0; + ec->t.valid.two_inv_p = 0; +} + +/* Accessor for helper variable. */ +static int ec_get_a_is_pminus3(struct mpi_ec_ctx *ec) +{ + MPI tmp; + + if (!ec->t.valid.a_is_pminus3) { + ec->t.valid.a_is_pminus3 = 1; + tmp = mpi_alloc_like(ec->p); + mpi_sub_ui(tmp, ec->p, 3); + ec->t.a_is_pminus3 = !mpi_cmp(ec->a, tmp); + mpi_free(tmp); + } + + return ec->t.a_is_pminus3; +} + +/* Accessor for helper variable. */ +static MPI ec_get_two_inv_p(struct mpi_ec_ctx *ec) +{ + if (!ec->t.valid.two_inv_p) { + ec->t.valid.two_inv_p = 1; + if (!ec->t.two_inv_p) + ec->t.two_inv_p = mpi_alloc(0); + ec_invm(ec->t.two_inv_p, mpi_const(MPI_C_TWO), ec); + } + return ec->t.two_inv_p; +} + +static const char *const curve25519_bad_points[] = { + "0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed", + "0x0000000000000000000000000000000000000000000000000000000000000000", + "0x0000000000000000000000000000000000000000000000000000000000000001", + "0x00b8495f16056286fdb1329ceb8d09da6ac49ff1fae35616aeb8413b7c7aebe0", + "0x57119fd0dd4e22d8868e1c58c45c44045bef839c55b1d0b1248c50a3bc959c5f", + "0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffec", + "0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffee", + NULL +}; + +static const char *const curve448_bad_points[] = { + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffe" + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + "0x00000000000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000000000", + "0x00000000000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000000001", + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffe" + "fffffffffffffffffffffffffffffffffffffffffffffffffffffffe", + "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + "00000000000000000000000000000000000000000000000000000000", + NULL +}; + +static const char *const *bad_points_table[] = { + curve25519_bad_points, + curve448_bad_points, +}; + +static void mpi_ec_coefficient_normalize(MPI a, MPI p) +{ + if (a->sign) { + mpi_resize(a, p->nlimbs); + mpihelp_sub_n(a->d, p->d, a->d, p->nlimbs); + a->nlimbs = p->nlimbs; + a->sign = 0; + } +} + +/* This function initialized a context for elliptic curve based on the + * field GF(p). P is the prime specifying this field, A is the first + * coefficient. CTX is expected to be zeroized. + */ +void mpi_ec_init(struct mpi_ec_ctx *ctx, enum gcry_mpi_ec_models model, + enum ecc_dialects dialect, + int flags, MPI p, MPI a, MPI b) +{ + int i; + static int use_barrett = -1 /* TODO: 1 or -1 */; + + mpi_ec_coefficient_normalize(a, p); + mpi_ec_coefficient_normalize(b, p); + + /* Fixme: Do we want to check some constraints? e.g. a < p */ + + ctx->model = model; + ctx->dialect = dialect; + ctx->flags = flags; + if (dialect == ECC_DIALECT_ED25519) + ctx->nbits = 256; + else + ctx->nbits = mpi_get_nbits(p); + ctx->p = mpi_copy(p); + ctx->a = mpi_copy(a); + ctx->b = mpi_copy(b); + + ctx->t.p_barrett = use_barrett > 0 ? mpi_barrett_init(ctx->p, 0) : NULL; + + mpi_ec_get_reset(ctx); + + if (model == MPI_EC_MONTGOMERY) { + for (i = 0; i < DIM(bad_points_table); i++) { + MPI p_candidate = mpi_scanval(bad_points_table[i][0]); + int match_p = !mpi_cmp(ctx->p, p_candidate); + int j; + + mpi_free(p_candidate); + if (!match_p) + continue; + + for (j = 0; i < DIM(ctx->t.scratch) && bad_points_table[i][j]; j++) + ctx->t.scratch[j] = mpi_scanval(bad_points_table[i][j]); + } + } else { + /* Allocate scratch variables. */ + for (i = 0; i < DIM(ctx->t.scratch); i++) + ctx->t.scratch[i] = mpi_alloc_like(ctx->p); + } + + ctx->addm = ec_addm; + ctx->subm = ec_subm; + ctx->mulm = ec_mulm; + ctx->mul2 = ec_mul2; + ctx->pow2 = ec_pow2; + + for (i = 0; field_table[i].p; i++) { + MPI f_p; + + f_p = mpi_scanval(field_table[i].p); + if (!f_p) + break; + + if (!mpi_cmp(p, f_p)) { + ctx->addm = field_table[i].addm; + ctx->subm = field_table[i].subm; + ctx->mulm = field_table[i].mulm; + ctx->mul2 = field_table[i].mul2; + ctx->pow2 = field_table[i].pow2; + mpi_free(f_p); + + mpi_resize(ctx->a, ctx->p->nlimbs); + ctx->a->nlimbs = ctx->p->nlimbs; + + mpi_resize(ctx->b, ctx->p->nlimbs); + ctx->b->nlimbs = ctx->p->nlimbs; + + for (i = 0; i < DIM(ctx->t.scratch) && ctx->t.scratch[i]; i++) + ctx->t.scratch[i]->nlimbs = ctx->p->nlimbs; + + break; + } + + mpi_free(f_p); + } +} +EXPORT_SYMBOL_GPL(mpi_ec_init); + +void mpi_ec_deinit(struct mpi_ec_ctx *ctx) +{ + int i; + + mpi_barrett_free(ctx->t.p_barrett); + + /* Domain parameter. */ + mpi_free(ctx->p); + mpi_free(ctx->a); + mpi_free(ctx->b); + mpi_point_release(ctx->G); + mpi_free(ctx->n); + + /* The key. */ + mpi_point_release(ctx->Q); + mpi_free(ctx->d); + + /* Private data of ec.c. */ + mpi_free(ctx->t.two_inv_p); + + for (i = 0; i < DIM(ctx->t.scratch); i++) + mpi_free(ctx->t.scratch[i]); +} +EXPORT_SYMBOL_GPL(mpi_ec_deinit); + +/* Compute the affine coordinates from the projective coordinates in + * POINT. Set them into X and Y. If one coordinate is not required, + * X or Y may be passed as NULL. CTX is the usual context. Returns: 0 + * on success or !0 if POINT is at infinity. + */ +int mpi_ec_get_affine(MPI x, MPI y, MPI_POINT point, struct mpi_ec_ctx *ctx) +{ + if (!mpi_cmp_ui(point->z, 0)) + return -1; + + switch (ctx->model) { + case MPI_EC_WEIERSTRASS: /* Using Jacobian coordinates. */ + { + MPI z1, z2, z3; + + z1 = mpi_new(0); + z2 = mpi_new(0); + ec_invm(z1, point->z, ctx); /* z1 = z^(-1) mod p */ + ec_mulm(z2, z1, z1, ctx); /* z2 = z^(-2) mod p */ + + if (x) + ec_mulm(x, point->x, z2, ctx); + + if (y) { + z3 = mpi_new(0); + ec_mulm(z3, z2, z1, ctx); /* z3 = z^(-3) mod p */ + ec_mulm(y, point->y, z3, ctx); + mpi_free(z3); + } + + mpi_free(z2); + mpi_free(z1); + } + return 0; + + case MPI_EC_MONTGOMERY: + { + if (x) + mpi_set(x, point->x); + + if (y) { + log_fatal("%s: Getting Y-coordinate on %s is not supported\n", + "mpi_ec_get_affine", "Montgomery"); + return -1; + } + } + return 0; + + case MPI_EC_EDWARDS: + { + MPI z; + + z = mpi_new(0); + ec_invm(z, point->z, ctx); + + mpi_resize(z, ctx->p->nlimbs); + z->nlimbs = ctx->p->nlimbs; + + if (x) { + mpi_resize(x, ctx->p->nlimbs); + x->nlimbs = ctx->p->nlimbs; + ctx->mulm(x, point->x, z, ctx); + } + if (y) { + mpi_resize(y, ctx->p->nlimbs); + y->nlimbs = ctx->p->nlimbs; + ctx->mulm(y, point->y, z, ctx); + } + + mpi_free(z); + } + return 0; + + default: + return -1; + } +} +EXPORT_SYMBOL_GPL(mpi_ec_get_affine); + +/* RESULT = 2 * POINT (Weierstrass version). */ +static void dup_point_weierstrass(MPI_POINT result, + MPI_POINT point, struct mpi_ec_ctx *ctx) +{ +#define x3 (result->x) +#define y3 (result->y) +#define z3 (result->z) +#define t1 (ctx->t.scratch[0]) +#define t2 (ctx->t.scratch[1]) +#define t3 (ctx->t.scratch[2]) +#define l1 (ctx->t.scratch[3]) +#define l2 (ctx->t.scratch[4]) +#define l3 (ctx->t.scratch[5]) + + if (!mpi_cmp_ui(point->y, 0) || !mpi_cmp_ui(point->z, 0)) { + /* P_y == 0 || P_z == 0 => [1:1:0] */ + mpi_set_ui(x3, 1); + mpi_set_ui(y3, 1); + mpi_set_ui(z3, 0); + } else { + if (ec_get_a_is_pminus3(ctx)) { + /* Use the faster case. */ + /* L1 = 3(X - Z^2)(X + Z^2) */ + /* T1: used for Z^2. */ + /* T2: used for the right term. */ + ec_pow2(t1, point->z, ctx); + ec_subm(l1, point->x, t1, ctx); + ec_mulm(l1, l1, mpi_const(MPI_C_THREE), ctx); + ec_addm(t2, point->x, t1, ctx); + ec_mulm(l1, l1, t2, ctx); + } else { + /* Standard case. */ + /* L1 = 3X^2 + aZ^4 */ + /* T1: used for aZ^4. */ + ec_pow2(l1, point->x, ctx); + ec_mulm(l1, l1, mpi_const(MPI_C_THREE), ctx); + ec_powm(t1, point->z, mpi_const(MPI_C_FOUR), ctx); + ec_mulm(t1, t1, ctx->a, ctx); + ec_addm(l1, l1, t1, ctx); + } + /* Z3 = 2YZ */ + ec_mulm(z3, point->y, point->z, ctx); + ec_mul2(z3, z3, ctx); + + /* L2 = 4XY^2 */ + /* T2: used for Y2; required later. */ + ec_pow2(t2, point->y, ctx); + ec_mulm(l2, t2, point->x, ctx); + ec_mulm(l2, l2, mpi_const(MPI_C_FOUR), ctx); + + /* X3 = L1^2 - 2L2 */ + /* T1: used for L2^2. */ + ec_pow2(x3, l1, ctx); + ec_mul2(t1, l2, ctx); + ec_subm(x3, x3, t1, ctx); + + /* L3 = 8Y^4 */ + /* T2: taken from above. */ + ec_pow2(t2, t2, ctx); + ec_mulm(l3, t2, mpi_const(MPI_C_EIGHT), ctx); + + /* Y3 = L1(L2 - X3) - L3 */ + ec_subm(y3, l2, x3, ctx); + ec_mulm(y3, y3, l1, ctx); + ec_subm(y3, y3, l3, ctx); + } + +#undef x3 +#undef y3 +#undef z3 +#undef t1 +#undef t2 +#undef t3 +#undef l1 +#undef l2 +#undef l3 +} + +/* RESULT = 2 * POINT (Montgomery version). */ +static void dup_point_montgomery(MPI_POINT result, + MPI_POINT point, struct mpi_ec_ctx *ctx) +{ + (void)result; + (void)point; + (void)ctx; + log_fatal("%s: %s not yet supported\n", + "mpi_ec_dup_point", "Montgomery"); +} + +/* RESULT = 2 * POINT (Twisted Edwards version). */ +static void dup_point_edwards(MPI_POINT result, + MPI_POINT point, struct mpi_ec_ctx *ctx) +{ +#define X1 (point->x) +#define Y1 (point->y) +#define Z1 (point->z) +#define X3 (result->x) +#define Y3 (result->y) +#define Z3 (result->z) +#define B (ctx->t.scratch[0]) +#define C (ctx->t.scratch[1]) +#define D (ctx->t.scratch[2]) +#define E (ctx->t.scratch[3]) +#define F (ctx->t.scratch[4]) +#define H (ctx->t.scratch[5]) +#define J (ctx->t.scratch[6]) + + /* Compute: (X_3 : Y_3 : Z_3) = 2( X_1 : Y_1 : Z_1 ) */ + + /* B = (X_1 + Y_1)^2 */ + ctx->addm(B, X1, Y1, ctx); + ctx->pow2(B, B, ctx); + + /* C = X_1^2 */ + /* D = Y_1^2 */ + ctx->pow2(C, X1, ctx); + ctx->pow2(D, Y1, ctx); + + /* E = aC */ + if (ctx->dialect == ECC_DIALECT_ED25519) + ctx->subm(E, ctx->p, C, ctx); + else + ctx->mulm(E, ctx->a, C, ctx); + + /* F = E + D */ + ctx->addm(F, E, D, ctx); + + /* H = Z_1^2 */ + ctx->pow2(H, Z1, ctx); + + /* J = F - 2H */ + ctx->mul2(J, H, ctx); + ctx->subm(J, F, J, ctx); + + /* X_3 = (B - C - D) · J */ + ctx->subm(X3, B, C, ctx); + ctx->subm(X3, X3, D, ctx); + ctx->mulm(X3, X3, J, ctx); + + /* Y_3 = F · (E - D) */ + ctx->subm(Y3, E, D, ctx); + ctx->mulm(Y3, Y3, F, ctx); + + /* Z_3 = F · J */ + ctx->mulm(Z3, F, J, ctx); + +#undef X1 +#undef Y1 +#undef Z1 +#undef X3 +#undef Y3 +#undef Z3 +#undef B +#undef C +#undef D +#undef E +#undef F +#undef H +#undef J +} + +/* RESULT = 2 * POINT */ +static void +mpi_ec_dup_point(MPI_POINT result, MPI_POINT point, struct mpi_ec_ctx *ctx) +{ + switch (ctx->model) { + case MPI_EC_WEIERSTRASS: + dup_point_weierstrass(result, point, ctx); + break; + case MPI_EC_MONTGOMERY: + dup_point_montgomery(result, point, ctx); + break; + case MPI_EC_EDWARDS: + dup_point_edwards(result, point, ctx); + break; + } +} + +/* RESULT = P1 + P2 (Weierstrass version).*/ +static void add_points_weierstrass(MPI_POINT result, + MPI_POINT p1, MPI_POINT p2, + struct mpi_ec_ctx *ctx) +{ +#define x1 (p1->x) +#define y1 (p1->y) +#define z1 (p1->z) +#define x2 (p2->x) +#define y2 (p2->y) +#define z2 (p2->z) +#define x3 (result->x) +#define y3 (result->y) +#define z3 (result->z) +#define l1 (ctx->t.scratch[0]) +#define l2 (ctx->t.scratch[1]) +#define l3 (ctx->t.scratch[2]) +#define l4 (ctx->t.scratch[3]) +#define l5 (ctx->t.scratch[4]) +#define l6 (ctx->t.scratch[5]) +#define l7 (ctx->t.scratch[6]) +#define l8 (ctx->t.scratch[7]) +#define l9 (ctx->t.scratch[8]) +#define t1 (ctx->t.scratch[9]) +#define t2 (ctx->t.scratch[10]) + + if ((!mpi_cmp(x1, x2)) && (!mpi_cmp(y1, y2)) && (!mpi_cmp(z1, z2))) { + /* Same point; need to call the duplicate function. */ + mpi_ec_dup_point(result, p1, ctx); + } else if (!mpi_cmp_ui(z1, 0)) { + /* P1 is at infinity. */ + mpi_set(x3, p2->x); + mpi_set(y3, p2->y); + mpi_set(z3, p2->z); + } else if (!mpi_cmp_ui(z2, 0)) { + /* P2 is at infinity. */ + mpi_set(x3, p1->x); + mpi_set(y3, p1->y); + mpi_set(z3, p1->z); + } else { + int z1_is_one = !mpi_cmp_ui(z1, 1); + int z2_is_one = !mpi_cmp_ui(z2, 1); + + /* l1 = x1 z2^2 */ + /* l2 = x2 z1^2 */ + if (z2_is_one) + mpi_set(l1, x1); + else { + ec_pow2(l1, z2, ctx); + ec_mulm(l1, l1, x1, ctx); + } + if (z1_is_one) + mpi_set(l2, x2); + else { + ec_pow2(l2, z1, ctx); + ec_mulm(l2, l2, x2, ctx); + } + /* l3 = l1 - l2 */ + ec_subm(l3, l1, l2, ctx); + /* l4 = y1 z2^3 */ + ec_powm(l4, z2, mpi_const(MPI_C_THREE), ctx); + ec_mulm(l4, l4, y1, ctx); + /* l5 = y2 z1^3 */ + ec_powm(l5, z1, mpi_const(MPI_C_THREE), ctx); + ec_mulm(l5, l5, y2, ctx); + /* l6 = l4 - l5 */ + ec_subm(l6, l4, l5, ctx); + + if (!mpi_cmp_ui(l3, 0)) { + if (!mpi_cmp_ui(l6, 0)) { + /* P1 and P2 are the same - use duplicate function. */ + mpi_ec_dup_point(result, p1, ctx); + } else { + /* P1 is the inverse of P2. */ + mpi_set_ui(x3, 1); + mpi_set_ui(y3, 1); + mpi_set_ui(z3, 0); + } + } else { + /* l7 = l1 + l2 */ + ec_addm(l7, l1, l2, ctx); + /* l8 = l4 + l5 */ + ec_addm(l8, l4, l5, ctx); + /* z3 = z1 z2 l3 */ + ec_mulm(z3, z1, z2, ctx); + ec_mulm(z3, z3, l3, ctx); + /* x3 = l6^2 - l7 l3^2 */ + ec_pow2(t1, l6, ctx); + ec_pow2(t2, l3, ctx); + ec_mulm(t2, t2, l7, ctx); + ec_subm(x3, t1, t2, ctx); + /* l9 = l7 l3^2 - 2 x3 */ + ec_mul2(t1, x3, ctx); + ec_subm(l9, t2, t1, ctx); + /* y3 = (l9 l6 - l8 l3^3)/2 */ + ec_mulm(l9, l9, l6, ctx); + ec_powm(t1, l3, mpi_const(MPI_C_THREE), ctx); /* fixme: Use saved value*/ + ec_mulm(t1, t1, l8, ctx); + ec_subm(y3, l9, t1, ctx); + ec_mulm(y3, y3, ec_get_two_inv_p(ctx), ctx); + } + } + +#undef x1 +#undef y1 +#undef z1 +#undef x2 +#undef y2 +#undef z2 +#undef x3 +#undef y3 +#undef z3 +#undef l1 +#undef l2 +#undef l3 +#undef l4 +#undef l5 +#undef l6 +#undef l7 +#undef l8 +#undef l9 +#undef t1 +#undef t2 +} + +/* RESULT = P1 + P2 (Montgomery version).*/ +static void add_points_montgomery(MPI_POINT result, + MPI_POINT p1, MPI_POINT p2, + struct mpi_ec_ctx *ctx) +{ + (void)result; + (void)p1; + (void)p2; + (void)ctx; + log_fatal("%s: %s not yet supported\n", + "mpi_ec_add_points", "Montgomery"); +} + +/* RESULT = P1 + P2 (Twisted Edwards version).*/ +static void add_points_edwards(MPI_POINT result, + MPI_POINT p1, MPI_POINT p2, + struct mpi_ec_ctx *ctx) +{ +#define X1 (p1->x) +#define Y1 (p1->y) +#define Z1 (p1->z) +#define X2 (p2->x) +#define Y2 (p2->y) +#define Z2 (p2->z) +#define X3 (result->x) +#define Y3 (result->y) +#define Z3 (result->z) +#define A (ctx->t.scratch[0]) +#define B (ctx->t.scratch[1]) +#define C (ctx->t.scratch[2]) +#define D (ctx->t.scratch[3]) +#define E (ctx->t.scratch[4]) +#define F (ctx->t.scratch[5]) +#define G (ctx->t.scratch[6]) +#define tmp (ctx->t.scratch[7]) + + point_resize(result, ctx); + + /* Compute: (X_3 : Y_3 : Z_3) = (X_1 : Y_1 : Z_1) + (X_2 : Y_2 : Z_3) */ + + /* A = Z1 · Z2 */ + ctx->mulm(A, Z1, Z2, ctx); + + /* B = A^2 */ + ctx->pow2(B, A, ctx); + + /* C = X1 · X2 */ + ctx->mulm(C, X1, X2, ctx); + + /* D = Y1 · Y2 */ + ctx->mulm(D, Y1, Y2, ctx); + + /* E = d · C · D */ + ctx->mulm(E, ctx->b, C, ctx); + ctx->mulm(E, E, D, ctx); + + /* F = B - E */ + ctx->subm(F, B, E, ctx); + + /* G = B + E */ + ctx->addm(G, B, E, ctx); + + /* X_3 = A · F · ((X_1 + Y_1) · (X_2 + Y_2) - C - D) */ + ctx->addm(tmp, X1, Y1, ctx); + ctx->addm(X3, X2, Y2, ctx); + ctx->mulm(X3, X3, tmp, ctx); + ctx->subm(X3, X3, C, ctx); + ctx->subm(X3, X3, D, ctx); + ctx->mulm(X3, X3, F, ctx); + ctx->mulm(X3, X3, A, ctx); + + /* Y_3 = A · G · (D - aC) */ + if (ctx->dialect == ECC_DIALECT_ED25519) { + ctx->addm(Y3, D, C, ctx); + } else { + ctx->mulm(Y3, ctx->a, C, ctx); + ctx->subm(Y3, D, Y3, ctx); + } + ctx->mulm(Y3, Y3, G, ctx); + ctx->mulm(Y3, Y3, A, ctx); + + /* Z_3 = F · G */ + ctx->mulm(Z3, F, G, ctx); + + +#undef X1 +#undef Y1 +#undef Z1 +#undef X2 +#undef Y2 +#undef Z2 +#undef X3 +#undef Y3 +#undef Z3 +#undef A +#undef B +#undef C +#undef D +#undef E +#undef F +#undef G +#undef tmp +} + +/* Compute a step of Montgomery Ladder (only use X and Z in the point). + * Inputs: P1, P2, and x-coordinate of DIF = P1 - P1. + * Outputs: PRD = 2 * P1 and SUM = P1 + P2. + */ +static void montgomery_ladder(MPI_POINT prd, MPI_POINT sum, + MPI_POINT p1, MPI_POINT p2, MPI dif_x, + struct mpi_ec_ctx *ctx) +{ + ctx->addm(sum->x, p2->x, p2->z, ctx); + ctx->subm(p2->z, p2->x, p2->z, ctx); + ctx->addm(prd->x, p1->x, p1->z, ctx); + ctx->subm(p1->z, p1->x, p1->z, ctx); + ctx->mulm(p2->x, p1->z, sum->x, ctx); + ctx->mulm(p2->z, prd->x, p2->z, ctx); + ctx->pow2(p1->x, prd->x, ctx); + ctx->pow2(p1->z, p1->z, ctx); + ctx->addm(sum->x, p2->x, p2->z, ctx); + ctx->subm(p2->z, p2->x, p2->z, ctx); + ctx->mulm(prd->x, p1->x, p1->z, ctx); + ctx->subm(p1->z, p1->x, p1->z, ctx); + ctx->pow2(sum->x, sum->x, ctx); + ctx->pow2(sum->z, p2->z, ctx); + ctx->mulm(prd->z, p1->z, ctx->a, ctx); /* CTX->A: (a-2)/4 */ + ctx->mulm(sum->z, sum->z, dif_x, ctx); + ctx->addm(prd->z, p1->x, prd->z, ctx); + ctx->mulm(prd->z, prd->z, p1->z, ctx); +} + +/* RESULT = P1 + P2 */ +void mpi_ec_add_points(MPI_POINT result, + MPI_POINT p1, MPI_POINT p2, + struct mpi_ec_ctx *ctx) +{ + switch (ctx->model) { + case MPI_EC_WEIERSTRASS: + add_points_weierstrass(result, p1, p2, ctx); + break; + case MPI_EC_MONTGOMERY: + add_points_montgomery(result, p1, p2, ctx); + break; + case MPI_EC_EDWARDS: + add_points_edwards(result, p1, p2, ctx); + break; + } +} +EXPORT_SYMBOL_GPL(mpi_ec_add_points); + +/* Scalar point multiplication - the main function for ECC. If takes + * an integer SCALAR and a POINT as well as the usual context CTX. + * RESULT will be set to the resulting point. + */ +void mpi_ec_mul_point(MPI_POINT result, + MPI scalar, MPI_POINT point, + struct mpi_ec_ctx *ctx) +{ + MPI x1, y1, z1, k, h, yy; + unsigned int i, loops; + struct gcry_mpi_point p1, p2, p1inv; + + if (ctx->model == MPI_EC_EDWARDS) { + /* Simple left to right binary method. Algorithm 3.27 from + * {author={Hankerson, Darrel and Menezes, Alfred J. and Vanstone, Scott}, + * title = {Guide to Elliptic Curve Cryptography}, + * year = {2003}, isbn = {038795273X}, + * url = {http://www.cacr.math.uwaterloo.ca/ecc/}, + * publisher = {Springer-Verlag New York, Inc.}} + */ + unsigned int nbits; + int j; + + if (mpi_cmp(scalar, ctx->p) >= 0) + nbits = mpi_get_nbits(scalar); + else + nbits = mpi_get_nbits(ctx->p); + + mpi_set_ui(result->x, 0); + mpi_set_ui(result->y, 1); + mpi_set_ui(result->z, 1); + point_resize(point, ctx); + + point_resize(result, ctx); + point_resize(point, ctx); + + for (j = nbits-1; j >= 0; j--) { + mpi_ec_dup_point(result, result, ctx); + if (mpi_test_bit(scalar, j)) + mpi_ec_add_points(result, result, point, ctx); + } + return; + } else if (ctx->model == MPI_EC_MONTGOMERY) { + unsigned int nbits; + int j; + struct gcry_mpi_point p1_, p2_; + MPI_POINT q1, q2, prd, sum; + unsigned long sw; + mpi_size_t rsize; + int scalar_copied = 0; + + /* Compute scalar point multiplication with Montgomery Ladder. + * Note that we don't use Y-coordinate in the points at all. + * RESULT->Y will be filled by zero. + */ + + nbits = mpi_get_nbits(scalar); + point_init(&p1); + point_init(&p2); + point_init(&p1_); + point_init(&p2_); + mpi_set_ui(p1.x, 1); + mpi_free(p2.x); + p2.x = mpi_copy(point->x); + mpi_set_ui(p2.z, 1); + + point_resize(&p1, ctx); + point_resize(&p2, ctx); + point_resize(&p1_, ctx); + point_resize(&p2_, ctx); + + mpi_resize(point->x, ctx->p->nlimbs); + point->x->nlimbs = ctx->p->nlimbs; + + q1 = &p1; + q2 = &p2; + prd = &p1_; + sum = &p2_; + + for (j = nbits-1; j >= 0; j--) { + MPI_POINT t; + + sw = mpi_test_bit(scalar, j); + point_swap_cond(q1, q2, sw, ctx); + montgomery_ladder(prd, sum, q1, q2, point->x, ctx); + point_swap_cond(prd, sum, sw, ctx); + t = q1; q1 = prd; prd = t; + t = q2; q2 = sum; sum = t; + } + + mpi_clear(result->y); + sw = (nbits & 1); + point_swap_cond(&p1, &p1_, sw, ctx); + + rsize = p1.z->nlimbs; + MPN_NORMALIZE(p1.z->d, rsize); + if (rsize == 0) { + mpi_set_ui(result->x, 1); + mpi_set_ui(result->z, 0); + } else { + z1 = mpi_new(0); + ec_invm(z1, p1.z, ctx); + ec_mulm(result->x, p1.x, z1, ctx); + mpi_set_ui(result->z, 1); + mpi_free(z1); + } + + point_free(&p1); + point_free(&p2); + point_free(&p1_); + point_free(&p2_); + if (scalar_copied) + mpi_free(scalar); + return; + } + + x1 = mpi_alloc_like(ctx->p); + y1 = mpi_alloc_like(ctx->p); + h = mpi_alloc_like(ctx->p); + k = mpi_copy(scalar); + yy = mpi_copy(point->y); + + if (mpi_has_sign(k)) { + k->sign = 0; + ec_invm(yy, yy, ctx); + } + + if (!mpi_cmp_ui(point->z, 1)) { + mpi_set(x1, point->x); + mpi_set(y1, yy); + } else { + MPI z2, z3; + + z2 = mpi_alloc_like(ctx->p); + z3 = mpi_alloc_like(ctx->p); + ec_mulm(z2, point->z, point->z, ctx); + ec_mulm(z3, point->z, z2, ctx); + ec_invm(z2, z2, ctx); + ec_mulm(x1, point->x, z2, ctx); + ec_invm(z3, z3, ctx); + ec_mulm(y1, yy, z3, ctx); + mpi_free(z2); + mpi_free(z3); + } + z1 = mpi_copy(mpi_const(MPI_C_ONE)); + + mpi_mul(h, k, mpi_const(MPI_C_THREE)); /* h = 3k */ + loops = mpi_get_nbits(h); + if (loops < 2) { + /* If SCALAR is zero, the above mpi_mul sets H to zero and thus + * LOOPs will be zero. To avoid an underflow of I in the main + * loop we set LOOP to 2 and the result to (0,0,0). + */ + loops = 2; + mpi_clear(result->x); + mpi_clear(result->y); + mpi_clear(result->z); + } else { + mpi_set(result->x, point->x); + mpi_set(result->y, yy); + mpi_set(result->z, point->z); + } + mpi_free(yy); yy = NULL; + + p1.x = x1; x1 = NULL; + p1.y = y1; y1 = NULL; + p1.z = z1; z1 = NULL; + point_init(&p2); + point_init(&p1inv); + + /* Invert point: y = p - y mod p */ + point_set(&p1inv, &p1); + ec_subm(p1inv.y, ctx->p, p1inv.y, ctx); + + for (i = loops-2; i > 0; i--) { + mpi_ec_dup_point(result, result, ctx); + if (mpi_test_bit(h, i) == 1 && mpi_test_bit(k, i) == 0) { + point_set(&p2, result); + mpi_ec_add_points(result, &p2, &p1, ctx); + } + if (mpi_test_bit(h, i) == 0 && mpi_test_bit(k, i) == 1) { + point_set(&p2, result); + mpi_ec_add_points(result, &p2, &p1inv, ctx); + } + } + + point_free(&p1); + point_free(&p2); + point_free(&p1inv); + mpi_free(h); + mpi_free(k); +} +EXPORT_SYMBOL_GPL(mpi_ec_mul_point); + +/* Return true if POINT is on the curve described by CTX. */ +int mpi_ec_curve_point(MPI_POINT point, struct mpi_ec_ctx *ctx) +{ + int res = 0; + MPI x, y, w; + + x = mpi_new(0); + y = mpi_new(0); + w = mpi_new(0); + + /* Check that the point is in range. This needs to be done here and + * not after conversion to affine coordinates. + */ + if (mpi_cmpabs(point->x, ctx->p) >= 0) + goto leave; + if (mpi_cmpabs(point->y, ctx->p) >= 0) + goto leave; + if (mpi_cmpabs(point->z, ctx->p) >= 0) + goto leave; + + switch (ctx->model) { + case MPI_EC_WEIERSTRASS: + { + MPI xxx; + + if (mpi_ec_get_affine(x, y, point, ctx)) + goto leave; + + xxx = mpi_new(0); + + /* y^2 == x^3 + a·x + b */ + ec_pow2(y, y, ctx); + + ec_pow3(xxx, x, ctx); + ec_mulm(w, ctx->a, x, ctx); + ec_addm(w, w, ctx->b, ctx); + ec_addm(w, w, xxx, ctx); + + if (!mpi_cmp(y, w)) + res = 1; + + mpi_free(xxx); + } + break; + + case MPI_EC_MONTGOMERY: + { +#define xx y + /* With Montgomery curve, only X-coordinate is valid. */ + if (mpi_ec_get_affine(x, NULL, point, ctx)) + goto leave; + + /* The equation is: b * y^2 == x^3 + a · x^2 + x */ + /* We check if right hand is quadratic residue or not by + * Euler's criterion. + */ + /* CTX->A has (a-2)/4 and CTX->B has b^-1 */ + ec_mulm(w, ctx->a, mpi_const(MPI_C_FOUR), ctx); + ec_addm(w, w, mpi_const(MPI_C_TWO), ctx); + ec_mulm(w, w, x, ctx); + ec_pow2(xx, x, ctx); + ec_addm(w, w, xx, ctx); + ec_addm(w, w, mpi_const(MPI_C_ONE), ctx); + ec_mulm(w, w, x, ctx); + ec_mulm(w, w, ctx->b, ctx); +#undef xx + /* Compute Euler's criterion: w^(p-1)/2 */ +#define p_minus1 y + ec_subm(p_minus1, ctx->p, mpi_const(MPI_C_ONE), ctx); + mpi_rshift(p_minus1, p_minus1, 1); + ec_powm(w, w, p_minus1, ctx); + + res = !mpi_cmp_ui(w, 1); +#undef p_minus1 + } + break; + + case MPI_EC_EDWARDS: + { + if (mpi_ec_get_affine(x, y, point, ctx)) + goto leave; + + mpi_resize(w, ctx->p->nlimbs); + w->nlimbs = ctx->p->nlimbs; + + /* a · x^2 + y^2 - 1 - b · x^2 · y^2 == 0 */ + ctx->pow2(x, x, ctx); + ctx->pow2(y, y, ctx); + if (ctx->dialect == ECC_DIALECT_ED25519) + ctx->subm(w, ctx->p, x, ctx); + else + ctx->mulm(w, ctx->a, x, ctx); + ctx->addm(w, w, y, ctx); + ctx->mulm(x, x, y, ctx); + ctx->mulm(x, x, ctx->b, ctx); + ctx->subm(w, w, x, ctx); + if (!mpi_cmp_ui(w, 1)) + res = 1; + } + break; + } + +leave: + mpi_free(w); + mpi_free(x); + mpi_free(y); + + return res; +} +EXPORT_SYMBOL_GPL(mpi_ec_curve_point); diff --git a/lib/mpi/generic_mpih-add1.c b/lib/mpi/generic_mpih-add1.c new file mode 100644 index 000000000..299308b54 --- /dev/null +++ b/lib/mpi/generic_mpih-add1.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* mpihelp-add_1.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, + * 2000 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size) +{ + mpi_limb_t x, y, cy; + mpi_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to one addend */ + cy = y < cy; /* get out carry from that addition */ + y += x; /* add other addend */ + cy += y < x; /* get out carry from that add, combine */ + res_ptr[j] = y; + } while (++j); + + return cy; +} diff --git a/lib/mpi/generic_mpih-lshift.c b/lib/mpi/generic_mpih-lshift.c new file mode 100644 index 000000000..7b21f5938 --- /dev/null +++ b/lib/mpi/generic_mpih-lshift.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* mpihelp-lshift.c - MPI helper functions + * Copyright (C) 1994, 1996, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" + +/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left + * and store the USIZE least significant digits of the result at WP. + * Return the bits shifted out from the most significant digit. + * + * Argument constraints: + * 1. 0 < CNT < BITS_PER_MP_LIMB + * 2. If the result is to be written over the input, WP must be >= UP. + */ + +mpi_limb_t +mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned int cnt) +{ + mpi_limb_t high_limb, low_limb; + unsigned sh_1, sh_2; + mpi_size_t i; + mpi_limb_t retval; + + sh_1 = cnt; + wp += 1; + sh_2 = BITS_PER_MPI_LIMB - sh_1; + i = usize - 1; + low_limb = up[i]; + retval = low_limb >> sh_2; + high_limb = low_limb; + while (--i >= 0) { + low_limb = up[i]; + wp[i] = (high_limb << sh_1) | (low_limb >> sh_2); + high_limb = low_limb; + } + wp[i] = high_limb << sh_1; + + return retval; +} diff --git a/lib/mpi/generic_mpih-mul1.c b/lib/mpi/generic_mpih-mul1.c new file mode 100644 index 000000000..e020e61d4 --- /dev/null +++ b/lib/mpi/generic_mpih-mul1.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* mpihelp-mul_1.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_limb_t s2_limb) +{ + mpi_limb_t cy_limb; + mpi_size_t j; + mpi_limb_t prod_high, prod_low; + + /* The loop counter and index J goes from -S1_SIZE to -1. This way + * the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + res_ptr -= j; + + cy_limb = 0; + do { + umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb); + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high; + res_ptr[j] = prod_low; + } while (++j); + + return cy_limb; +} diff --git a/lib/mpi/generic_mpih-mul2.c b/lib/mpi/generic_mpih-mul2.c new file mode 100644 index 000000000..9484d8528 --- /dev/null +++ b/lib/mpi/generic_mpih-mul2.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* mpihelp-mul_2.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t cy_limb; + mpi_size_t j; + mpi_limb_t prod_high, prod_low; + mpi_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + * the loop becomes faster. */ + j = -s1_size; + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do { + umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high; + + x = res_ptr[j]; + prod_low = x + prod_low; + cy_limb += prod_low < x ? 1 : 0; + res_ptr[j] = prod_low; + } while (++j); + return cy_limb; +} diff --git a/lib/mpi/generic_mpih-mul3.c b/lib/mpi/generic_mpih-mul3.c new file mode 100644 index 000000000..ccdbab412 --- /dev/null +++ b/lib/mpi/generic_mpih-mul3.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* mpihelp-mul_3.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t cy_limb; + mpi_size_t j; + mpi_limb_t prod_high, prod_low; + mpi_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + * the loop becomes faster. */ + j = -s1_size; + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do { + umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high; + + x = res_ptr[j]; + prod_low = x - prod_low; + cy_limb += prod_low > x ? 1 : 0; + res_ptr[j] = prod_low; + } while (++j); + + return cy_limb; +} diff --git a/lib/mpi/generic_mpih-rshift.c b/lib/mpi/generic_mpih-rshift.c new file mode 100644 index 000000000..e07bc69aa --- /dev/null +++ b/lib/mpi/generic_mpih-rshift.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* mpih-rshift.c - MPI helper functions + * Copyright (C) 1994, 1996, 1998, 1999, + * 2000, 2001 Free Software Foundation, Inc. + * + * This file is part of GNUPG + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" + +/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right + * and store the USIZE least significant limbs of the result at WP. + * The bits shifted out to the right are returned. + * + * Argument constraints: + * 1. 0 < CNT < BITS_PER_MP_LIMB + * 2. If the result is to be written over the input, WP must be <= UP. + */ + +mpi_limb_t +mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt) +{ + mpi_limb_t high_limb, low_limb; + unsigned sh_1, sh_2; + mpi_size_t i; + mpi_limb_t retval; + + sh_1 = cnt; + wp -= 1; + sh_2 = BITS_PER_MPI_LIMB - sh_1; + high_limb = up[0]; + retval = high_limb << sh_2; + low_limb = high_limb; + for (i = 1; i < usize; i++) { + high_limb = up[i]; + wp[i] = (low_limb >> sh_1) | (high_limb << sh_2); + low_limb = high_limb; + } + wp[i] = low_limb >> sh_1; + + return retval; +} diff --git a/lib/mpi/generic_mpih-sub1.c b/lib/mpi/generic_mpih-sub1.c new file mode 100644 index 000000000..eea4382aa --- /dev/null +++ b/lib/mpi/generic_mpih-sub1.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* mpihelp-add_2.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size) +{ + mpi_limb_t x, y, cy; + mpi_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to subtrahend */ + cy = y < cy; /* get out carry from that addition */ + y = x - y; /* main subtract */ + cy += y > x; /* get out carry from the subtract, combine */ + res_ptr[j] = y; + } while (++j); + + return cy; +} diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h new file mode 100644 index 000000000..afbd99987 --- /dev/null +++ b/lib/mpi/longlong.h @@ -0,0 +1,1361 @@ +/* longlong.h -- definitions for mixed size 32/64 bit arithmetic. + * Note: I added some stuff for use with gnupg + * + * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998, + * 2000, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + * License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this file; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. */ + +#include <linux/count_zeros.h> + +/* You have to define the following before including this file: + * + * UWtype -- An unsigned type, default type for operations (typically a "word") + * UHWtype -- An unsigned type, at least half the size of UWtype. + * UDWtype -- An unsigned type, at least twice as large a UWtype + * W_TYPE_SIZE -- size in bits of UWtype + * + * SItype, USItype -- Signed and unsigned 32 bit types. + * DItype, UDItype -- Signed and unsigned 64 bit types. + * + * On a 32 bit machine UWtype should typically be USItype; + * on a 64 bit machine, UWtype should typically be UDItype. +*/ + +#define __BITS4 (W_TYPE_SIZE / 4) +#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) + +/* This is used to make sure no undesirable sharing between different libraries + that use this file takes place. */ +#ifndef __MPN +#define __MPN(x) __##x +#endif + +/* Define auxiliary asm macros. + * + * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two + * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype + * word product in HIGH_PROD and LOW_PROD. + * + * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a + * UDWtype product. This is just a variant of umul_ppmm. + + * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + * denominator) divides a UDWtype, composed by the UWtype integers + * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient + * in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less + * than DENOMINATOR for correct operation. If, in addition, the most + * significant bit of DENOMINATOR must be 1, then the pre-processor symbol + * UDIV_NEEDS_NORMALIZATION is defined to 1. + * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + * denominator). Like udiv_qrnnd but the numbers are signed. The quotient + * is rounded towards 0. + * + * 5) count_leading_zeros(count, x) counts the number of zero-bits from the + * msb to the first non-zero bit in the UWtype X. This is the number of + * steps X needs to be shifted left to set the msb. Undefined for X == 0, + * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. + * + * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts + * from the least significant end. + * + * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, + * high_addend_2, low_addend_2) adds two UWtype integers, composed by + * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 + * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow + * (i.e. carry out) is not stored anywhere, and is lost. + * + * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, + * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, + * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and + * LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE + * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, + * and is lost. + * + * If any of these macros are left undefined for a particular CPU, + * C macros are used. */ + +/* The CPUs come in alphabetical order below. + * + * Please add support for more CPUs here, or improve the current support + * for the CPUs below! */ + +#if defined(__GNUC__) && !defined(NO_ASM) + +/* We sometimes need to clobber "cc" with gcc2, but that would not be + understood by gcc1. Use cpp to avoid major code duplication. */ +#if __GNUC__ < 2 +#define __CLOBBER_CC +#define __AND_CLOBBER_CC +#else /* __GNUC__ >= 2 */ +#define __CLOBBER_CC : "cc" +#define __AND_CLOBBER_CC , "cc" +#endif /* __GNUC__ < 2 */ + +/*************************************** + ************** A29K ***************** + ***************************************/ +#if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %1,%4,%5\n" \ + "addc %0,%2,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %1,%4,%5\n" \ + "subc %0,%2,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("multiplu %0,%1,%2" \ + : "=r" ((USItype)(xl)) \ + : "r" (__m0), \ + "r" (__m1)); \ + __asm__ ("multmu %0,%1,%2" \ + : "=r" ((USItype)(xh)) \ + : "r" (__m0), \ + "r" (__m1)); \ +} while (0) +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("dividu %0,%3,%4" \ + : "=r" ((USItype)(q)), \ + "=q" ((USItype)(r)) \ + : "1" ((USItype)(n1)), \ + "r" ((USItype)(n0)), \ + "r" ((USItype)(d))) +#endif /* __a29k__ */ + +#if defined(__alpha) && W_TYPE_SIZE == 64 +#define umul_ppmm(ph, pl, m0, m1) \ +do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + (ph) = __builtin_alpha_umulh(__m0, __m1); \ + (pl) = __m0 * __m1; \ +} while (0) +#define UMUL_TIME 46 +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ +do { UDItype __r; \ + (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ + (r) = __r; \ +} while (0) +extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype); +#define UDIV_TIME 220 +#endif /* LONGLONG_STANDALONE */ +#endif /* __alpha */ + +/*************************************** + ************** ARM ****************** + ***************************************/ +#if defined(__arm__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("adds %1, %4, %5\n" \ + "adc %0, %2, %3" \ + : "=r" (sh), \ + "=&r" (sl) \ + : "%r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subs %1, %4, %5\n" \ + "sbc %0, %2, %3" \ + : "=r" (sh), \ + "=&r" (sl) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +#if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__ +#define umul_ppmm(xh, xl, a, b) \ + __asm__ ("@ Inlined umul_ppmm\n" \ + "mov %|r0, %2, lsr #16 @ AAAA\n" \ + "mov %|r2, %3, lsr #16 @ BBBB\n" \ + "bic %|r1, %2, %|r0, lsl #16 @ aaaa\n" \ + "bic %0, %3, %|r2, lsl #16 @ bbbb\n" \ + "mul %1, %|r1, %|r2 @ aaaa * BBBB\n" \ + "mul %|r2, %|r0, %|r2 @ AAAA * BBBB\n" \ + "mul %|r1, %0, %|r1 @ aaaa * bbbb\n" \ + "mul %0, %|r0, %0 @ AAAA * bbbb\n" \ + "adds %|r0, %1, %0 @ central sum\n" \ + "addcs %|r2, %|r2, #65536\n" \ + "adds %1, %|r1, %|r0, lsl #16\n" \ + "adc %0, %|r2, %|r0, lsr #16" \ + : "=&r" (xh), \ + "=r" (xl) \ + : "r" ((USItype)(a)), \ + "r" ((USItype)(b)) \ + : "r0", "r1", "r2") +#else +#define umul_ppmm(xh, xl, a, b) \ + __asm__ ("@ Inlined umul_ppmm\n" \ + "umull %1, %0, %2, %3" \ + : "=&r" (xh), \ + "=&r" (xl) \ + : "r" ((USItype)(a)), \ + "r" ((USItype)(b)) \ + : "r0", "r1") +#endif +#define UMUL_TIME 20 +#define UDIV_TIME 100 +#endif /* __arm__ */ + +/*************************************** + ************** CLIPPER ************** + ***************************************/ +#if defined(__clipper__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __xx; \ + __asm__ ("mulwux %2,%0" \ + : "=r" (__xx.__ll) \ + : "%0" ((USItype)(u)), \ + "r" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#define smul_ppmm(w1, w0, u, v) \ + ({union {DItype __ll; \ + struct {SItype __l, __h; } __i; \ + } __xx; \ + __asm__ ("mulwx %2,%0" \ + : "=r" (__xx.__ll) \ + : "%0" ((SItype)(u)), \ + "r" ((SItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("mulwux %2,%0" \ + : "=r" (__w) \ + : "%0" ((USItype)(u)), \ + "r" ((USItype)(v))); \ + __w; }) +#endif /* __clipper__ */ + +/*************************************** + ************** GMICRO *************** + ***************************************/ +#if defined(__gmicro__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add.w %5,%1\n" \ + "addx %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub.w %5,%1\n" \ + "subx %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define umul_ppmm(ph, pl, m0, m1) \ + __asm__ ("mulx %3,%0,%1" \ + : "=g" ((USItype)(ph)), \ + "=r" ((USItype)(pl)) \ + : "%0" ((USItype)(m0)), \ + "g" ((USItype)(m1))) +#define udiv_qrnnd(q, r, nh, nl, d) \ + __asm__ ("divx %4,%0,%1" \ + : "=g" ((USItype)(q)), \ + "=r" ((USItype)(r)) \ + : "1" ((USItype)(nh)), \ + "0" ((USItype)(nl)), \ + "g" ((USItype)(d))) +#endif + +/*************************************** + ************** HPPA ***************** + ***************************************/ +#if defined(__hppa) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %4,%5,%1\n" \ + "addc %2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%rM" ((USItype)(ah)), \ + "rM" ((USItype)(bh)), \ + "%rM" ((USItype)(al)), \ + "rM" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %4,%5,%1\n" \ + "subb %2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "rM" ((USItype)(ah)), \ + "rM" ((USItype)(bh)), \ + "rM" ((USItype)(al)), \ + "rM" ((USItype)(bl))) +#if 0 && defined(_PA_RISC1_1) +/* xmpyu uses floating point register which is not allowed in Linux kernel. */ +#define umul_ppmm(wh, wl, u, v) \ +do { \ + union {UDItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __xx; \ + __asm__ ("xmpyu %1,%2,%0" \ + : "=*f" (__xx.__ll) \ + : "*f" ((USItype)(u)), \ + "*f" ((USItype)(v))); \ + (wh) = __xx.__i.__h; \ + (wl) = __xx.__i.__l; \ +} while (0) +#define UMUL_TIME 8 +#define UDIV_TIME 60 +#else +#define UMUL_TIME 40 +#define UDIV_TIME 80 +#endif +#if 0 /* #ifndef LONGLONG_STANDALONE */ +#define udiv_qrnnd(q, r, n1, n0, d) \ +do { USItype __r; \ + (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ + (r) = __r; \ +} while (0) +extern USItype __udiv_qrnnd(); +#endif /* LONGLONG_STANDALONE */ +#endif /* hppa */ + +/*************************************** + ************** I370 ***************** + ***************************************/ +#if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32 +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ + union {UDItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __xx; \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mr %0,%3" \ + : "=r" (__xx.__i.__h), \ + "=r" (__xx.__i.__l) \ + : "%1" (__m0), \ + "r" (__m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + (xh) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ +} while (0) +#define smul_ppmm(xh, xl, m0, m1) \ +do { \ + union {DItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __xx; \ + __asm__ ("mr %0,%3" \ + : "=r" (__xx.__i.__h), \ + "=r" (__xx.__i.__l) \ + : "%1" (m0), \ + "r" (m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ +} while (0) +#define sdiv_qrnnd(q, r, n1, n0, d) \ +do { \ + union {DItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __xx; \ + __xx.__i.__h = n1; __xx.__i.__l = n0; \ + __asm__ ("dr %0,%2" \ + : "=r" (__xx.__ll) \ + : "0" (__xx.__ll), "r" (d)); \ + (q) = __xx.__i.__l; (r) = __xx.__i.__h; \ +} while (0) +#endif + +/*************************************** + ************** I386 ***************** + ***************************************/ +#undef __i386__ +#if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addl %5,%1\n" \ + "adcl %3,%0" \ + : "=r" (sh), \ + "=&r" (sl) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subl %5,%1\n" \ + "sbbl %3,%0" \ + : "=r" (sh), \ + "=&r" (sl) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mull %3" \ + : "=a" (w0), \ + "=d" (w1) \ + : "%0" ((USItype)(u)), \ + "rm" ((USItype)(v))) +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divl %4" \ + : "=a" (q), \ + "=d" (r) \ + : "0" ((USItype)(n0)), \ + "1" ((USItype)(n1)), \ + "rm" ((USItype)(d))) +#ifndef UMUL_TIME +#define UMUL_TIME 40 +#endif +#ifndef UDIV_TIME +#define UDIV_TIME 40 +#endif +#endif /* 80x86 */ + +/*************************************** + ************** I860 ***************** + ***************************************/ +#if defined(__i860__) && W_TYPE_SIZE == 32 +#define rshift_rhlc(r, h, l, c) \ + __asm__ ("shr %3,r0,r0\n" \ + "shrd %1,%2,%0" \ + "=r" (r) : "r" (h), "r" (l), "rn" (c)) +#endif /* i860 */ + +/*************************************** + ************** I960 ***************** + ***************************************/ +#if defined(__i960__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("cmpo 1,0\n" \ + "addc %5,%4,%1\n" \ + "addc %3,%2,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%dI" ((USItype)(ah)), \ + "dI" ((USItype)(bh)), \ + "%dI" ((USItype)(al)), \ + "dI" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("cmpo 0,0\n" \ + "subc %5,%4,%1\n" \ + "subc %3,%2,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "dI" ((USItype)(ah)), \ + "dI" ((USItype)(bh)), \ + "dI" ((USItype)(al)), \ + "dI" ((USItype)(bl))) +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __xx; \ + __asm__ ("emul %2,%1,%0" \ + : "=d" (__xx.__ll) \ + : "%dI" ((USItype)(u)), \ + "dI" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("emul %2,%1,%0" \ + : "=d" (__w) \ + : "%dI" ((USItype)(u)), \ + "dI" ((USItype)(v))); \ + __w; }) +#define udiv_qrnnd(q, r, nh, nl, d) \ +do { \ + union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __nn; \ + __nn.__i.__h = (nh); __nn.__i.__l = (nl); \ + __asm__ ("ediv %d,%n,%0" \ + : "=d" (__rq.__ll) \ + : "dI" (__nn.__ll), \ + "dI" ((USItype)(d))); \ + (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ +} while (0) +#if defined(__i960mx) /* what is the proper symbol to test??? */ +#define rshift_rhlc(r, h, l, c) \ +do { \ + union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __nn; \ + __nn.__i.__h = (h); __nn.__i.__l = (l); \ + __asm__ ("shre %2,%1,%0" \ + : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \ +} +#endif /* i960mx */ +#endif /* i960 */ + +/*************************************** + ************** 68000 **************** + ***************************************/ +#if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add%.l %5,%1\n" \ + "addx%.l %3,%0" \ + : "=d" ((USItype)(sh)), \ + "=&d" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "d" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub%.l %5,%1\n" \ + "subx%.l %3,%0" \ + : "=d" ((USItype)(sh)), \ + "=&d" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "d" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mulu%.l %3,%1:%0" \ + : "=d" ((USItype)(w0)), \ + "=d" ((USItype)(w1)) \ + : "%0" ((USItype)(u)), \ + "dmi" ((USItype)(v))) +#define UMUL_TIME 45 +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divu%.l %4,%1:%0" \ + : "=d" ((USItype)(q)), \ + "=d" ((USItype)(r)) \ + : "0" ((USItype)(n0)), \ + "1" ((USItype)(n1)), \ + "dmi" ((USItype)(d))) +#define UDIV_TIME 90 +#define sdiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divs%.l %4,%1:%0" \ + : "=d" ((USItype)(q)), \ + "=d" ((USItype)(r)) \ + : "0" ((USItype)(n0)), \ + "1" ((USItype)(n1)), \ + "dmi" ((USItype)(d))) +#else /* not mc68020 */ +#define umul_ppmm(xh, xl, a, b) \ +do { USItype __umul_tmp1, __umul_tmp2; \ + __asm__ ("| Inlined umul_ppmm\n" \ + "move%.l %5,%3\n" \ + "move%.l %2,%0\n" \ + "move%.w %3,%1\n" \ + "swap %3\n" \ + "swap %0\n" \ + "mulu %2,%1\n" \ + "mulu %3,%0\n" \ + "mulu %2,%3\n" \ + "swap %2\n" \ + "mulu %5,%2\n" \ + "add%.l %3,%2\n" \ + "jcc 1f\n" \ + "add%.l %#0x10000,%0\n" \ + "1: move%.l %2,%3\n" \ + "clr%.w %2\n" \ + "swap %2\n" \ + "swap %3\n" \ + "clr%.w %3\n" \ + "add%.l %3,%1\n" \ + "addx%.l %2,%0\n" \ + "| End inlined umul_ppmm" \ + : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \ + "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ + : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ +} while (0) +#define UMUL_TIME 100 +#define UDIV_TIME 400 +#endif /* not mc68020 */ +#endif /* mc68000 */ + +/*************************************** + ************** 88000 **************** + ***************************************/ +#if defined(__m88000__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addu.co %1,%r4,%r5\n" \ + "addu.ci %0,%r2,%r3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%rJ" ((USItype)(ah)), \ + "rJ" ((USItype)(bh)), \ + "%rJ" ((USItype)(al)), \ + "rJ" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subu.co %1,%r4,%r5\n" \ + "subu.ci %0,%r2,%r3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "rJ" ((USItype)(ah)), \ + "rJ" ((USItype)(bh)), \ + "rJ" ((USItype)(al)), \ + "rJ" ((USItype)(bl))) +#if defined(__m88110__) +#define umul_ppmm(wh, wl, u, v) \ +do { \ + union {UDItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __x; \ + __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \ + (wh) = __x.__i.__h; \ + (wl) = __x.__i.__l; \ +} while (0) +#define udiv_qrnnd(q, r, n1, n0, d) \ + ({union {UDItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __x, __q; \ + __x.__i.__h = (n1); __x.__i.__l = (n0); \ + __asm__ ("divu.d %0,%1,%2" \ + : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \ + (r) = (n0) - __q.__l * (d); (q) = __q.__l; }) +#define UMUL_TIME 5 +#define UDIV_TIME 25 +#else +#define UMUL_TIME 17 +#define UDIV_TIME 150 +#endif /* __m88110__ */ +#endif /* __m88000__ */ + +/*************************************** + ************** MIPS ***************** + ***************************************/ +#if defined(__mips__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ +do { \ + UDItype __ll = (UDItype)(u) * (v); \ + w1 = __ll >> 32; \ + w0 = __ll; \ +} while (0) +#define UMUL_TIME 10 +#define UDIV_TIME 100 +#endif /* __mips__ */ + +/*************************************** + ************** MIPS/64 ************** + ***************************************/ +#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 +#if defined(__mips_isa_rev) && __mips_isa_rev >= 6 && defined(CONFIG_CC_IS_GCC) +/* + * GCC ends up emitting a __multi3 intrinsic call for MIPS64r6 with the plain C + * code below, so we special case MIPS64r6 until the compiler can do better. + */ +#define umul_ppmm(w1, w0, u, v) \ +do { \ + __asm__ ("dmulu %0,%1,%2" \ + : "=d" ((UDItype)(w0)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))); \ + __asm__ ("dmuhu %0,%1,%2" \ + : "=d" ((UDItype)(w1)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))); \ +} while (0) +#else +#define umul_ppmm(w1, w0, u, v) \ +do { \ + typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ + __ll_UTItype __ll = (__ll_UTItype)(u) * (v); \ + w1 = __ll >> 64; \ + w0 = __ll; \ +} while (0) +#endif +#define UMUL_TIME 20 +#define UDIV_TIME 140 +#endif /* __mips__ */ + +/*************************************** + ************** 32000 **************** + ***************************************/ +#if defined(__ns32000__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __xx; \ + __asm__ ("meid %2,%0" \ + : "=g" (__xx.__ll) \ + : "%0" ((USItype)(u)), \ + "g" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("meid %2,%0" \ + : "=g" (__w) \ + : "%0" ((USItype)(u)), \ + "g" ((USItype)(v))); \ + __w; }) +#define udiv_qrnnd(q, r, n1, n0, d) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __xx; \ + __xx.__i.__h = (n1); __xx.__i.__l = (n0); \ + __asm__ ("deid %2,%0" \ + : "=g" (__xx.__ll) \ + : "0" (__xx.__ll), \ + "g" ((USItype)(d))); \ + (r) = __xx.__i.__l; (q) = __xx.__i.__h; }) +#endif /* __ns32000__ */ + +/*************************************** + ************** PPC ****************** + ***************************************/ +#if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +do { \ + if (__builtin_constant_p(bh) && (bh) == 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ + : "=r" (sh), \ + "=&r" (sl) \ + : "%r" ((USItype)(ah)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ + else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ + : "=r" (sh), \ + "=&r" (sl) \ + : "%r" ((USItype)(ah)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ + else \ + __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ + : "=r" (sh), \ + "=&r" (sl) \ + : "%r" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ +} while (0) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +do { \ + if (__builtin_constant_p(ah) && (ah) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ + : "=r" (sh), \ + "=&r" (sl) \ + : "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ + : "=r" (sh), \ + "=&r" (sl) \ + : "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p(bh) && (bh) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ + : "=r" (sh), \ + "=&r" (sl) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ + : "=r" (sh), \ + "=&r" (sl) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else \ + __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ + : "=r" (sh), \ + "=&r" (sl) \ + : "r" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ +} while (0) +#if defined(_ARCH_PPC) +#define umul_ppmm(ph, pl, m0, m1) \ +do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhwu %0,%1,%2" \ + : "=r" (ph) \ + : "%r" (__m0), \ + "r" (__m1)); \ + (pl) = __m0 * __m1; \ +} while (0) +#define UMUL_TIME 15 +#define smul_ppmm(ph, pl, m0, m1) \ +do { \ + SItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhw %0,%1,%2" \ + : "=r" ((SItype) ph) \ + : "%r" (__m0), \ + "r" (__m1)); \ + (pl) = __m0 * __m1; \ +} while (0) +#define SMUL_TIME 14 +#define UDIV_TIME 120 +#else +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mul %0,%2,%3" \ + : "=r" ((USItype)(xh)), \ + "=q" ((USItype)(xl)) \ + : "r" (__m0), \ + "r" (__m1)); \ + (xh) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ +} while (0) +#define UMUL_TIME 8 +#define smul_ppmm(xh, xl, m0, m1) \ + __asm__ ("mul %0,%2,%3" \ + : "=r" ((SItype)(xh)), \ + "=q" ((SItype)(xl)) \ + : "r" (m0), \ + "r" (m1)) +#define SMUL_TIME 4 +#define sdiv_qrnnd(q, r, nh, nl, d) \ + __asm__ ("div %0,%2,%4" \ + : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \ + : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d))) +#define UDIV_TIME 100 +#endif +#endif /* Power architecture variants. */ + +/*************************************** + ************** PYR ****************** + ***************************************/ +#if defined(__pyr__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addw %5,%1\n" \ + "addwc %3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subw %5,%1\n" \ + "subwb %3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) + /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */ +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __xx; \ + __asm__ ("movw %1,%R0\n" \ + "uemul %2,%0" \ + : "=&r" (__xx.__ll) \ + : "g" ((USItype) (u)), \ + "g" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#endif /* __pyr__ */ + +/*************************************** + ************** RT/ROMP ************** + ***************************************/ +#if defined(__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("a %1,%5\n" \ + "ae %0,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "r" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("s %1,%5\n" \ + "se %0,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "r" ((USItype)(bl))) +#define umul_ppmm(ph, pl, m0, m1) \ +do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ( \ + "s r2,r2\n" \ + "mts r10,%2\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "cas %0,r2,r0\n" \ + "mfs r10,%1" \ + : "=r" ((USItype)(ph)), \ + "=r" ((USItype)(pl)) \ + : "%r" (__m0), \ + "r" (__m1) \ + : "r2"); \ + (ph) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ +} while (0) +#define UMUL_TIME 20 +#define UDIV_TIME 200 +#endif /* RT/ROMP */ + +/*************************************** + ************** SH2 ****************** + ***************************************/ +#if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \ + && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ( \ + "dmulu.l %2,%3\n" \ + "sts macl,%1\n" \ + "sts mach,%0" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v)) \ + : "macl", "mach") +#define UMUL_TIME 5 +#endif + +/*************************************** + ************** SPARC **************** + ***************************************/ +#if defined(__sparc__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addcc %r4,%5,%1\n" \ + "addx %r2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%rJ" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "%rJ" ((USItype)(al)), \ + "rI" ((USItype)(bl)) \ + __CLOBBER_CC) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subcc %r4,%5,%1\n" \ + "subx %r2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "rJ" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "rJ" ((USItype)(al)), \ + "rI" ((USItype)(bl)) \ + __CLOBBER_CC) +#if defined(__sparc_v8__) +/* Don't match immediate range because, 1) it is not often useful, + 2) the 'I' flag thinks of the range as a 13 bit signed interval, + while we want to match a 13 bit interval, sign extended to 32 bits, + but INTERPRETED AS UNSIGNED. */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v))) +#define UMUL_TIME 5 +#ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */ +#define udiv_qrnnd(q, r, n1, n0, d) \ +do { \ + USItype __q; \ + __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ + : "=r" ((USItype)(__q)) \ + : "r" ((USItype)(n1)), \ + "r" ((USItype)(n0)), \ + "r" ((USItype)(d))); \ + (r) = (n0) - __q * (d); \ + (q) = __q; \ +} while (0) +#define UDIV_TIME 25 +#endif /* SUPERSPARC */ +#else /* ! __sparc_v8__ */ +#if defined(__sparclite__) +/* This has hardware multiply but not divide. It also has two additional + instructions scan (ffs from high bit) and divscc. */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v))) +#define UMUL_TIME 5 +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("! Inlined udiv_qrnnd\n" \ + "wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ + "tst %%g0\n" \ + "divscc %3,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%0\n" \ + "rd %%y,%1\n" \ + "bl,a 1f\n" \ + "add %1,%4,%1\n" \ + "1: ! End of inline udiv_qrnnd" \ + : "=r" ((USItype)(q)), \ + "=r" ((USItype)(r)) \ + : "r" ((USItype)(n1)), \ + "r" ((USItype)(n0)), \ + "rI" ((USItype)(d)) \ + : "%g1" __AND_CLOBBER_CC) +#define UDIV_TIME 37 +#endif /* __sparclite__ */ +#endif /* __sparc_v8__ */ + /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ +#ifndef umul_ppmm +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("! Inlined umul_ppmm\n" \ + "wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \ + "sra %3,31,%%g2 ! Don't move this insn\n" \ + "and %2,%%g2,%%g2 ! Don't move this insn\n" \ + "andcc %%g0,0,%%g1 ! Don't move this insn\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,0,%%g1\n" \ + "add %%g1,%%g2,%0\n" \ + "rd %%y,%1" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "%rI" ((USItype)(u)), \ + "r" ((USItype)(v)) \ + : "%g1", "%g2" __AND_CLOBBER_CC) +#define UMUL_TIME 39 /* 39 instructions */ +/* It's quite necessary to add this much assembler for the sparc. + The default udiv_qrnnd (in C) is more than 10 times slower! */ +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("! Inlined udiv_qrnnd\n\t" \ + "mov 32,%%g1\n\t" \ + "subcc %1,%2,%%g0\n\t" \ + "1: bcs 5f\n\t" \ + "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ + "sub %1,%2,%1 ! this kills msb of n\n\t" \ + "addx %1,%1,%1 ! so this can't give carry\n\t" \ + "subcc %%g1,1,%%g1\n\t" \ + "2: bne 1b\n\t" \ + "subcc %1,%2,%%g0\n\t" \ + "bcs 3f\n\t" \ + "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ + "b 3f\n\t" \ + "sub %1,%2,%1 ! this kills msb of n\n\t" \ + "4: sub %1,%2,%1\n\t" \ + "5: addxcc %1,%1,%1\n\t" \ + "bcc 2b\n\t" \ + "subcc %%g1,1,%%g1\n\t" \ + "! Got carry from n. Subtract next step to cancel this carry.\n\t" \ + "bne 4b\n\t" \ + "addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \ + "sub %1,%2,%1\n\t" \ + "3: xnor %0,0,%0\n\t" \ + "! End of inline udiv_qrnnd\n" \ + : "=&r" ((USItype)(q)), \ + "=&r" ((USItype)(r)) \ + : "r" ((USItype)(d)), \ + "1" ((USItype)(n1)), \ + "0" ((USItype)(n0)) : "%g1", "cc") +#define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */ +#endif +#endif /* __sparc__ */ + +/*************************************** + ************** VAX ****************** + ***************************************/ +#if defined(__vax__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addl2 %5,%1\n" \ + "adwc %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subl2 %5,%1\n" \ + "sbwc %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ + union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __xx; \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("emul %1,%2,$0,%0" \ + : "=g" (__xx.__ll) \ + : "g" (__m0), \ + "g" (__m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + (xh) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ +} while (0) +#define sdiv_qrnnd(q, r, n1, n0, d) \ +do { \ + union {DItype __ll; \ + struct {SItype __l, __h; } __i; \ + } __xx; \ + __xx.__i.__h = n1; __xx.__i.__l = n0; \ + __asm__ ("ediv %3,%2,%0,%1" \ + : "=g" (q), "=g" (r) \ + : "g" (__xx.__ll), "g" (d)); \ +} while (0) +#endif /* __vax__ */ + +/*************************************** + ************** Z8000 **************** + ***************************************/ +#if defined(__z8000__) && W_TYPE_SIZE == 16 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ + : "=r" ((unsigned int)(sh)), \ + "=&r" ((unsigned int)(sl)) \ + : "%0" ((unsigned int)(ah)), \ + "r" ((unsigned int)(bh)), \ + "%1" ((unsigned int)(al)), \ + "rQR" ((unsigned int)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ + : "=r" ((unsigned int)(sh)), \ + "=&r" ((unsigned int)(sl)) \ + : "0" ((unsigned int)(ah)), \ + "r" ((unsigned int)(bh)), \ + "1" ((unsigned int)(al)), \ + "rQR" ((unsigned int)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ + union {long int __ll; \ + struct {unsigned int __h, __l; } __i; \ + } __xx; \ + unsigned int __m0 = (m0), __m1 = (m1); \ + __asm__ ("mult %S0,%H3" \ + : "=r" (__xx.__i.__h), \ + "=r" (__xx.__i.__l) \ + : "%1" (__m0), \ + "rQR" (__m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + (xh) += ((((signed int) __m0 >> 15) & __m1) \ + + (((signed int) __m1 >> 15) & __m0)); \ +} while (0) +#endif /* __z8000__ */ + +#endif /* __GNUC__ */ + +/*************************************** + *********** Generic Versions ******** + ***************************************/ +#if !defined(umul_ppmm) && defined(__umulsidi3) +#define umul_ppmm(ph, pl, m0, m1) \ +{ \ + UDWtype __ll = __umulsidi3(m0, m1); \ + ph = (UWtype) (__ll >> W_TYPE_SIZE); \ + pl = (UWtype) __ll; \ +} +#endif + +#if !defined(__umulsidi3) +#define __umulsidi3(u, v) \ + ({UWtype __hi, __lo; \ + umul_ppmm(__hi, __lo, u, v); \ + ((UDWtype) __hi << W_TYPE_SIZE) | __lo; }) +#endif + + /* If this machine has no inline assembler, use C macros. */ + +#if !defined(add_ssaaaa) +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +do { \ + UWtype __x; \ + __x = (al) + (bl); \ + (sh) = (ah) + (bh) + (__x < (al)); \ + (sl) = __x; \ +} while (0) +#endif + +#if !defined(sub_ddmmss) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +do { \ + UWtype __x; \ + __x = (al) - (bl); \ + (sh) = (ah) - (bh) - (__x > (al)); \ + (sl) = __x; \ +} while (0) +#endif + +#if !defined(umul_ppmm) +#define umul_ppmm(w1, w0, u, v) \ +do { \ + UWtype __x0, __x1, __x2, __x3; \ + UHWtype __ul, __vl, __uh, __vh; \ + UWtype __u = (u), __v = (v); \ + \ + __ul = __ll_lowpart(__u); \ + __uh = __ll_highpart(__u); \ + __vl = __ll_lowpart(__v); \ + __vh = __ll_highpart(__v); \ + \ + __x0 = (UWtype) __ul * __vl; \ + __x1 = (UWtype) __ul * __vh; \ + __x2 = (UWtype) __uh * __vl; \ + __x3 = (UWtype) __uh * __vh; \ + \ + __x1 += __ll_highpart(__x0);/* this can't give carry */ \ + __x1 += __x2; /* but this indeed can */ \ + if (__x1 < __x2) /* did we get it? */ \ + __x3 += __ll_B; /* yes, add it in the proper pos. */ \ + \ + (w1) = __x3 + __ll_highpart(__x1); \ + (w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \ +} while (0) +#endif + +#if !defined(umul_ppmm) +#define smul_ppmm(w1, w0, u, v) \ +do { \ + UWtype __w1; \ + UWtype __m0 = (u), __m1 = (v); \ + umul_ppmm(__w1, w0, __m0, __m1); \ + (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \ + - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \ +} while (0) +#endif + + /* Define this unconditionally, so it can be used for debugging. */ +#define __udiv_qrnnd_c(q, r, n1, n0, d) \ +do { \ + UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ + __d1 = __ll_highpart(d); \ + __d0 = __ll_lowpart(d); \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (UWtype) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart(n0); \ + if (__r1 < __m) { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (UWtype) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart(n0); \ + if (__r0 < __m) { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (UWtype) __q1 * __ll_B | __q0; \ + (r) = __r0; \ +} while (0) + +/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through + __udiv_w_sdiv (defined in libgcc or elsewhere). */ +#if !defined(udiv_qrnnd) && defined(sdiv_qrnnd) +#define udiv_qrnnd(q, r, nh, nl, d) \ +do { \ + UWtype __r; \ + (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \ + (r) = __r; \ +} while (0) +#endif + + /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ +#if !defined(udiv_qrnnd) +#define UDIV_NEEDS_NORMALIZATION 1 +#define udiv_qrnnd __udiv_qrnnd_c +#endif + +#ifndef UDIV_NEEDS_NORMALIZATION +#define UDIV_NEEDS_NORMALIZATION 0 +#endif diff --git a/lib/mpi/mpi-add.c b/lib/mpi/mpi-add.c new file mode 100644 index 000000000..2cdae54c1 --- /dev/null +++ b/lib/mpi/mpi-add.c @@ -0,0 +1,155 @@ +/* mpi-add.c - MPI functions + * Copyright (C) 1994, 1996, 1998, 2001, 2002, + * 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include "mpi-internal.h" + +/**************** + * Add the unsigned integer V to the mpi-integer U and store the + * result in W. U and V may be the same. + */ +void mpi_add_ui(MPI w, MPI u, unsigned long v) +{ + mpi_ptr_t wp, up; + mpi_size_t usize, wsize; + int usign, wsign; + + usize = u->nlimbs; + usign = u->sign; + wsign = 0; + + /* If not space for W (and possible carry), increase space. */ + wsize = usize + 1; + if (w->alloced < wsize) + mpi_resize(w, wsize); + + /* These must be after realloc (U may be the same as W). */ + up = u->d; + wp = w->d; + + if (!usize) { /* simple */ + wp[0] = v; + wsize = v ? 1:0; + } else if (!usign) { /* mpi is not negative */ + mpi_limb_t cy; + cy = mpihelp_add_1(wp, up, usize, v); + wp[usize] = cy; + wsize = usize + cy; + } else { + /* The signs are different. Need exact comparison to determine + * which operand to subtract from which. + */ + if (usize == 1 && up[0] < v) { + wp[0] = v - up[0]; + wsize = 1; + } else { + mpihelp_sub_1(wp, up, usize, v); + /* Size can decrease with at most one limb. */ + wsize = usize - (wp[usize-1] == 0); + wsign = 1; + } + } + + w->nlimbs = wsize; + w->sign = wsign; +} + + +void mpi_add(MPI w, MPI u, MPI v) +{ + mpi_ptr_t wp, up, vp; + mpi_size_t usize, vsize, wsize; + int usign, vsign, wsign; + + if (u->nlimbs < v->nlimbs) { /* Swap U and V. */ + usize = v->nlimbs; + usign = v->sign; + vsize = u->nlimbs; + vsign = u->sign; + wsize = usize + 1; + RESIZE_IF_NEEDED(w, wsize); + /* These must be after realloc (u or v may be the same as w). */ + up = v->d; + vp = u->d; + } else { + usize = u->nlimbs; + usign = u->sign; + vsize = v->nlimbs; + vsign = v->sign; + wsize = usize + 1; + RESIZE_IF_NEEDED(w, wsize); + /* These must be after realloc (u or v may be the same as w). */ + up = u->d; + vp = v->d; + } + wp = w->d; + wsign = 0; + + if (!vsize) { /* simple */ + MPN_COPY(wp, up, usize); + wsize = usize; + wsign = usign; + } else if (usign != vsign) { /* different sign */ + /* This test is right since USIZE >= VSIZE */ + if (usize != vsize) { + mpihelp_sub(wp, up, usize, vp, vsize); + wsize = usize; + MPN_NORMALIZE(wp, wsize); + wsign = usign; + } else if (mpihelp_cmp(up, vp, usize) < 0) { + mpihelp_sub_n(wp, vp, up, usize); + wsize = usize; + MPN_NORMALIZE(wp, wsize); + if (!usign) + wsign = 1; + } else { + mpihelp_sub_n(wp, up, vp, usize); + wsize = usize; + MPN_NORMALIZE(wp, wsize); + if (usign) + wsign = 1; + } + } else { /* U and V have same sign. Add them. */ + mpi_limb_t cy = mpihelp_add(wp, up, usize, vp, vsize); + wp[usize] = cy; + wsize = usize + cy; + if (usign) + wsign = 1; + } + + w->nlimbs = wsize; + w->sign = wsign; +} +EXPORT_SYMBOL_GPL(mpi_add); + +void mpi_sub(MPI w, MPI u, MPI v) +{ + MPI vv = mpi_copy(v); + vv->sign = !vv->sign; + mpi_add(w, u, vv); + mpi_free(vv); +} + + +void mpi_addm(MPI w, MPI u, MPI v, MPI m) +{ + mpi_add(w, u, v); + mpi_mod(w, w, m); +} +EXPORT_SYMBOL_GPL(mpi_addm); + +void mpi_subm(MPI w, MPI u, MPI v, MPI m) +{ + mpi_sub(w, u, v); + mpi_mod(w, w, m); +} +EXPORT_SYMBOL_GPL(mpi_subm); diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c new file mode 100644 index 000000000..142b68083 --- /dev/null +++ b/lib/mpi/mpi-bit.c @@ -0,0 +1,307 @@ +/* mpi-bit.c - MPI bit level functions + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "mpi-internal.h" +#include "longlong.h" + +#define A_LIMB_1 ((mpi_limb_t) 1) + +/**************** + * Sometimes we have MSL (most significant limbs) which are 0; + * this is for some reasons not good, so this function removes them. + */ +void mpi_normalize(MPI a) +{ + for (; a->nlimbs && !a->d[a->nlimbs - 1]; a->nlimbs--) + ; +} +EXPORT_SYMBOL_GPL(mpi_normalize); + +/**************** + * Return the number of bits in A. + */ +unsigned mpi_get_nbits(MPI a) +{ + unsigned n; + + mpi_normalize(a); + + if (a->nlimbs) { + mpi_limb_t alimb = a->d[a->nlimbs - 1]; + if (alimb) + n = count_leading_zeros(alimb); + else + n = BITS_PER_MPI_LIMB; + n = BITS_PER_MPI_LIMB - n + (a->nlimbs - 1) * BITS_PER_MPI_LIMB; + } else + n = 0; + return n; +} +EXPORT_SYMBOL_GPL(mpi_get_nbits); + +/**************** + * Test whether bit N is set. + */ +int mpi_test_bit(MPI a, unsigned int n) +{ + unsigned int limbno, bitno; + mpi_limb_t limb; + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if (limbno >= a->nlimbs) + return 0; /* too far left: this is a 0 */ + limb = a->d[limbno]; + return (limb & (A_LIMB_1 << bitno)) ? 1 : 0; +} +EXPORT_SYMBOL_GPL(mpi_test_bit); + +/**************** + * Set bit N of A. + */ +void mpi_set_bit(MPI a, unsigned int n) +{ + unsigned int i, limbno, bitno; + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if (limbno >= a->nlimbs) { + for (i = a->nlimbs; i < a->alloced; i++) + a->d[i] = 0; + mpi_resize(a, limbno+1); + a->nlimbs = limbno+1; + } + a->d[limbno] |= (A_LIMB_1<<bitno); +} + +/**************** + * Set bit N of A. and clear all bits above + */ +void mpi_set_highbit(MPI a, unsigned int n) +{ + unsigned int i, limbno, bitno; + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if (limbno >= a->nlimbs) { + for (i = a->nlimbs; i < a->alloced; i++) + a->d[i] = 0; + mpi_resize(a, limbno+1); + a->nlimbs = limbno+1; + } + a->d[limbno] |= (A_LIMB_1<<bitno); + for (bitno++; bitno < BITS_PER_MPI_LIMB; bitno++) + a->d[limbno] &= ~(A_LIMB_1 << bitno); + a->nlimbs = limbno+1; +} +EXPORT_SYMBOL_GPL(mpi_set_highbit); + +/**************** + * clear bit N of A and all bits above + */ +void mpi_clear_highbit(MPI a, unsigned int n) +{ + unsigned int limbno, bitno; + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if (limbno >= a->nlimbs) + return; /* not allocated, therefore no need to clear bits :-) */ + + for ( ; bitno < BITS_PER_MPI_LIMB; bitno++) + a->d[limbno] &= ~(A_LIMB_1 << bitno); + a->nlimbs = limbno+1; +} + +/**************** + * Clear bit N of A. + */ +void mpi_clear_bit(MPI a, unsigned int n) +{ + unsigned int limbno, bitno; + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if (limbno >= a->nlimbs) + return; /* Don't need to clear this bit, it's far too left. */ + a->d[limbno] &= ~(A_LIMB_1 << bitno); +} +EXPORT_SYMBOL_GPL(mpi_clear_bit); + + +/**************** + * Shift A by COUNT limbs to the right + * This is used only within the MPI library + */ +void mpi_rshift_limbs(MPI a, unsigned int count) +{ + mpi_ptr_t ap = a->d; + mpi_size_t n = a->nlimbs; + unsigned int i; + + if (count >= n) { + a->nlimbs = 0; + return; + } + + for (i = 0; i < n - count; i++) + ap[i] = ap[i+count]; + ap[i] = 0; + a->nlimbs -= count; +} + +/* + * Shift A by N bits to the right. + */ +void mpi_rshift(MPI x, MPI a, unsigned int n) +{ + mpi_size_t xsize; + unsigned int i; + unsigned int nlimbs = (n/BITS_PER_MPI_LIMB); + unsigned int nbits = (n%BITS_PER_MPI_LIMB); + + if (x == a) { + /* In-place operation. */ + if (nlimbs >= x->nlimbs) { + x->nlimbs = 0; + return; + } + + if (nlimbs) { + for (i = 0; i < x->nlimbs - nlimbs; i++) + x->d[i] = x->d[i+nlimbs]; + x->d[i] = 0; + x->nlimbs -= nlimbs; + } + if (x->nlimbs && nbits) + mpihelp_rshift(x->d, x->d, x->nlimbs, nbits); + } else if (nlimbs) { + /* Copy and shift by more or equal bits than in a limb. */ + xsize = a->nlimbs; + x->sign = a->sign; + RESIZE_IF_NEEDED(x, xsize); + x->nlimbs = xsize; + for (i = 0; i < a->nlimbs; i++) + x->d[i] = a->d[i]; + x->nlimbs = i; + + if (nlimbs >= x->nlimbs) { + x->nlimbs = 0; + return; + } + + if (nlimbs) { + for (i = 0; i < x->nlimbs - nlimbs; i++) + x->d[i] = x->d[i+nlimbs]; + x->d[i] = 0; + x->nlimbs -= nlimbs; + } + + if (x->nlimbs && nbits) + mpihelp_rshift(x->d, x->d, x->nlimbs, nbits); + } else { + /* Copy and shift by less than bits in a limb. */ + xsize = a->nlimbs; + x->sign = a->sign; + RESIZE_IF_NEEDED(x, xsize); + x->nlimbs = xsize; + + if (xsize) { + if (nbits) + mpihelp_rshift(x->d, a->d, x->nlimbs, nbits); + else { + /* The rshift helper function is not specified for + * NBITS==0, thus we do a plain copy here. + */ + for (i = 0; i < x->nlimbs; i++) + x->d[i] = a->d[i]; + } + } + } + MPN_NORMALIZE(x->d, x->nlimbs); +} + +/**************** + * Shift A by COUNT limbs to the left + * This is used only within the MPI library + */ +void mpi_lshift_limbs(MPI a, unsigned int count) +{ + mpi_ptr_t ap; + int n = a->nlimbs; + int i; + + if (!count || !n) + return; + + RESIZE_IF_NEEDED(a, n+count); + + ap = a->d; + for (i = n-1; i >= 0; i--) + ap[i+count] = ap[i]; + for (i = 0; i < count; i++) + ap[i] = 0; + a->nlimbs += count; +} + +/* + * Shift A by N bits to the left. + */ +void mpi_lshift(MPI x, MPI a, unsigned int n) +{ + unsigned int nlimbs = (n/BITS_PER_MPI_LIMB); + unsigned int nbits = (n%BITS_PER_MPI_LIMB); + + if (x == a && !n) + return; /* In-place shift with an amount of zero. */ + + if (x != a) { + /* Copy A to X. */ + unsigned int alimbs = a->nlimbs; + int asign = a->sign; + mpi_ptr_t xp, ap; + + RESIZE_IF_NEEDED(x, alimbs+nlimbs+1); + xp = x->d; + ap = a->d; + MPN_COPY(xp, ap, alimbs); + x->nlimbs = alimbs; + x->flags = a->flags; + x->sign = asign; + } + + if (nlimbs && !nbits) { + /* Shift a full number of limbs. */ + mpi_lshift_limbs(x, nlimbs); + } else if (n) { + /* We use a very dump approach: Shift left by the number of + * limbs plus one and than fix it up by an rshift. + */ + mpi_lshift_limbs(x, nlimbs+1); + mpi_rshift(x, x, BITS_PER_MPI_LIMB - nbits); + } + + MPN_NORMALIZE(x->d, x->nlimbs); +} diff --git a/lib/mpi/mpi-cmp.c b/lib/mpi/mpi-cmp.c new file mode 100644 index 000000000..0835b6213 --- /dev/null +++ b/lib/mpi/mpi-cmp.c @@ -0,0 +1,98 @@ +/* mpi-cmp.c - MPI functions + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "mpi-internal.h" + +int mpi_cmp_ui(MPI u, unsigned long v) +{ + mpi_limb_t limb = v; + + mpi_normalize(u); + if (u->nlimbs == 0) { + if (v == 0) + return 0; + else + return -1; + } + if (u->sign) + return -1; + if (u->nlimbs > 1) + return 1; + + if (u->d[0] == limb) + return 0; + else if (u->d[0] > limb) + return 1; + else + return -1; +} +EXPORT_SYMBOL_GPL(mpi_cmp_ui); + +static int do_mpi_cmp(MPI u, MPI v, int absmode) +{ + mpi_size_t usize; + mpi_size_t vsize; + int usign; + int vsign; + int cmp; + + mpi_normalize(u); + mpi_normalize(v); + + usize = u->nlimbs; + vsize = v->nlimbs; + usign = absmode ? 0 : u->sign; + vsign = absmode ? 0 : v->sign; + + /* Compare sign bits. */ + + if (!usign && vsign) + return 1; + if (usign && !vsign) + return -1; + + /* U and V are either both positive or both negative. */ + + if (usize != vsize && !usign && !vsign) + return usize - vsize; + if (usize != vsize && usign && vsign) + return vsize + usize; + if (!usize) + return 0; + cmp = mpihelp_cmp(u->d, v->d, usize); + if (!cmp) + return 0; + if ((cmp < 0?1:0) == (usign?1:0)) + return 1; + + return -1; +} + +int mpi_cmp(MPI u, MPI v) +{ + return do_mpi_cmp(u, v, 0); +} +EXPORT_SYMBOL_GPL(mpi_cmp); + +int mpi_cmpabs(MPI u, MPI v) +{ + return do_mpi_cmp(u, v, 1); +} +EXPORT_SYMBOL_GPL(mpi_cmpabs); diff --git a/lib/mpi/mpi-div.c b/lib/mpi/mpi-div.c new file mode 100644 index 000000000..45beab8b9 --- /dev/null +++ b/lib/mpi/mpi-div.c @@ -0,0 +1,234 @@ +/* mpi-div.c - MPI functions + * Copyright (C) 1994, 1996, 1998, 2001, 2002, + * 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den); +void mpi_fdiv_qr(MPI quot, MPI rem, MPI dividend, MPI divisor); + +void mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor) +{ + int divisor_sign = divisor->sign; + MPI temp_divisor = NULL; + + /* We need the original value of the divisor after the remainder has been + * preliminary calculated. We have to copy it to temporary space if it's + * the same variable as REM. + */ + if (rem == divisor) { + temp_divisor = mpi_copy(divisor); + divisor = temp_divisor; + } + + mpi_tdiv_r(rem, dividend, divisor); + + if (((divisor_sign?1:0) ^ (dividend->sign?1:0)) && rem->nlimbs) + mpi_add(rem, rem, divisor); + + if (temp_divisor) + mpi_free(temp_divisor); +} + +void mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor) +{ + MPI tmp = mpi_alloc(mpi_get_nlimbs(quot)); + mpi_fdiv_qr(quot, tmp, dividend, divisor); + mpi_free(tmp); +} + +void mpi_fdiv_qr(MPI quot, MPI rem, MPI dividend, MPI divisor) +{ + int divisor_sign = divisor->sign; + MPI temp_divisor = NULL; + + if (quot == divisor || rem == divisor) { + temp_divisor = mpi_copy(divisor); + divisor = temp_divisor; + } + + mpi_tdiv_qr(quot, rem, dividend, divisor); + + if ((divisor_sign ^ dividend->sign) && rem->nlimbs) { + mpi_sub_ui(quot, quot, 1); + mpi_add(rem, rem, divisor); + } + + if (temp_divisor) + mpi_free(temp_divisor); +} + +/* If den == quot, den needs temporary storage. + * If den == rem, den needs temporary storage. + * If num == quot, num needs temporary storage. + * If den has temporary storage, it can be normalized while being copied, + * i.e no extra storage should be allocated. + */ + +void mpi_tdiv_r(MPI rem, MPI num, MPI den) +{ + mpi_tdiv_qr(NULL, rem, num, den); +} + +void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den) +{ + mpi_ptr_t np, dp; + mpi_ptr_t qp, rp; + mpi_size_t nsize = num->nlimbs; + mpi_size_t dsize = den->nlimbs; + mpi_size_t qsize, rsize; + mpi_size_t sign_remainder = num->sign; + mpi_size_t sign_quotient = num->sign ^ den->sign; + unsigned int normalization_steps; + mpi_limb_t q_limb; + mpi_ptr_t marker[5]; + int markidx = 0; + + /* Ensure space is enough for quotient and remainder. + * We need space for an extra limb in the remainder, because it's + * up-shifted (normalized) below. + */ + rsize = nsize + 1; + mpi_resize(rem, rsize); + + qsize = rsize - dsize; /* qsize cannot be bigger than this. */ + if (qsize <= 0) { + if (num != rem) { + rem->nlimbs = num->nlimbs; + rem->sign = num->sign; + MPN_COPY(rem->d, num->d, nsize); + } + if (quot) { + /* This needs to follow the assignment to rem, in case the + * numerator and quotient are the same. + */ + quot->nlimbs = 0; + quot->sign = 0; + } + return; + } + + if (quot) + mpi_resize(quot, qsize); + + /* Read pointers here, when reallocation is finished. */ + np = num->d; + dp = den->d; + rp = rem->d; + + /* Optimize division by a single-limb divisor. */ + if (dsize == 1) { + mpi_limb_t rlimb; + if (quot) { + qp = quot->d; + rlimb = mpihelp_divmod_1(qp, np, nsize, dp[0]); + qsize -= qp[qsize - 1] == 0; + quot->nlimbs = qsize; + quot->sign = sign_quotient; + } else + rlimb = mpihelp_mod_1(np, nsize, dp[0]); + rp[0] = rlimb; + rsize = rlimb != 0?1:0; + rem->nlimbs = rsize; + rem->sign = sign_remainder; + return; + } + + + if (quot) { + qp = quot->d; + /* Make sure QP and NP point to different objects. Otherwise the + * numerator would be gradually overwritten by the quotient limbs. + */ + if (qp == np) { /* Copy NP object to temporary space. */ + np = marker[markidx++] = mpi_alloc_limb_space(nsize); + MPN_COPY(np, qp, nsize); + } + } else /* Put quotient at top of remainder. */ + qp = rp + dsize; + + normalization_steps = count_leading_zeros(dp[dsize - 1]); + + /* Normalize the denominator, i.e. make its most significant bit set by + * shifting it NORMALIZATION_STEPS bits to the left. Also shift the + * numerator the same number of steps (to keep the quotient the same!). + */ + if (normalization_steps) { + mpi_ptr_t tp; + mpi_limb_t nlimb; + + /* Shift up the denominator setting the most significant bit of + * the most significant word. Use temporary storage not to clobber + * the original contents of the denominator. + */ + tp = marker[markidx++] = mpi_alloc_limb_space(dsize); + mpihelp_lshift(tp, dp, dsize, normalization_steps); + dp = tp; + + /* Shift up the numerator, possibly introducing a new most + * significant word. Move the shifted numerator in the remainder + * meanwhile. + */ + nlimb = mpihelp_lshift(rp, np, nsize, normalization_steps); + if (nlimb) { + rp[nsize] = nlimb; + rsize = nsize + 1; + } else + rsize = nsize; + } else { + /* The denominator is already normalized, as required. Copy it to + * temporary space if it overlaps with the quotient or remainder. + */ + if (dp == rp || (quot && (dp == qp))) { + mpi_ptr_t tp; + + tp = marker[markidx++] = mpi_alloc_limb_space(dsize); + MPN_COPY(tp, dp, dsize); + dp = tp; + } + + /* Move the numerator to the remainder. */ + if (rp != np) + MPN_COPY(rp, np, nsize); + + rsize = nsize; + } + + q_limb = mpihelp_divrem(qp, 0, rp, rsize, dp, dsize); + + if (quot) { + qsize = rsize - dsize; + if (q_limb) { + qp[qsize] = q_limb; + qsize += 1; + } + + quot->nlimbs = qsize; + quot->sign = sign_quotient; + } + + rsize = dsize; + MPN_NORMALIZE(rp, rsize); + + if (normalization_steps && rsize) { + mpihelp_rshift(rp, rp, rsize, normalization_steps); + rsize -= rp[rsize - 1] == 0?1:0; + } + + rem->nlimbs = rsize; + rem->sign = sign_remainder; + while (markidx) { + markidx--; + mpi_free_limb_space(marker[markidx]); + } +} diff --git a/lib/mpi/mpi-inline.h b/lib/mpi/mpi-inline.h new file mode 100644 index 000000000..980b6b940 --- /dev/null +++ b/lib/mpi/mpi-inline.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* mpi-inline.h - Internal to the Multi Precision Integers + * Copyright (C) 1994, 1996, 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#ifndef G10_MPI_INLINE_H +#define G10_MPI_INLINE_H + +#ifndef G10_MPI_INLINE_DECL +#define G10_MPI_INLINE_DECL static inline +#endif + +G10_MPI_INLINE_DECL mpi_limb_t +mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t x; + + x = *s1_ptr++; + s2_limb += x; + *res_ptr++ = s2_limb; + if (s2_limb < x) { /* sum is less than the left operand: handle carry */ + while (--s1_size) { + x = *s1_ptr++ + 1; /* add carry */ + *res_ptr++ = x; /* and store */ + if (x) /* not 0 (no overflow): we can stop */ + goto leave; + } + return 1; /* return carry (size of s1 to small) */ + } + +leave: + if (res_ptr != s1_ptr) { /* not the same variable */ + mpi_size_t i; /* copy the rest */ + for (i = 0; i < s1_size - 1; i++) + res_ptr[i] = s1_ptr[i]; + } + return 0; /* no carry */ +} + +G10_MPI_INLINE_DECL mpi_limb_t +mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size) +{ + mpi_limb_t cy = 0; + + if (s2_size) + cy = mpihelp_add_n(res_ptr, s1_ptr, s2_ptr, s2_size); + + if (s1_size - s2_size) + cy = mpihelp_add_1(res_ptr + s2_size, s1_ptr + s2_size, + s1_size - s2_size, cy); + return cy; +} + +G10_MPI_INLINE_DECL mpi_limb_t +mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t x; + + x = *s1_ptr++; + s2_limb = x - s2_limb; + *res_ptr++ = s2_limb; + if (s2_limb > x) { + while (--s1_size) { + x = *s1_ptr++; + *res_ptr++ = x - 1; + if (x) + goto leave; + } + return 1; + } + +leave: + if (res_ptr != s1_ptr) { + mpi_size_t i; + for (i = 0; i < s1_size - 1; i++) + res_ptr[i] = s1_ptr[i]; + } + return 0; +} + +G10_MPI_INLINE_DECL mpi_limb_t +mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size) +{ + mpi_limb_t cy = 0; + + if (s2_size) + cy = mpihelp_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size); + + if (s1_size - s2_size) + cy = mpihelp_sub_1(res_ptr + s2_size, s1_ptr + s2_size, + s1_size - s2_size, cy); + return cy; +} + +#endif /*G10_MPI_INLINE_H */ diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h new file mode 100644 index 000000000..554002182 --- /dev/null +++ b/lib/mpi/mpi-internal.h @@ -0,0 +1,232 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* mpi-internal.h - Internal to the Multi Precision Integers + * Copyright (C) 1994, 1996 Free Software Foundation, Inc. + * Copyright (C) 1998, 2000 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#ifndef G10_MPI_INTERNAL_H +#define G10_MPI_INTERNAL_H + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/mpi.h> +#include <linux/errno.h> + +#define log_debug printk +#define log_bug printk + +#define assert(x) \ + do { \ + if (!x) \ + log_bug("failed assertion\n"); \ + } while (0); + +/* If KARATSUBA_THRESHOLD is not already defined, define it to a + * value which is good on most machines. */ + +/* tested 4, 16, 32 and 64, where 16 gave the best performance when + * checking a 768 and a 1024 bit ElGamal signature. + * (wk 22.12.97) */ +#ifndef KARATSUBA_THRESHOLD +#define KARATSUBA_THRESHOLD 16 +#endif + +/* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */ +#if KARATSUBA_THRESHOLD < 2 +#undef KARATSUBA_THRESHOLD +#define KARATSUBA_THRESHOLD 2 +#endif + +typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */ +typedef int mpi_size_t; /* (must be a signed type) */ + +#define RESIZE_IF_NEEDED(a, b) \ + do { \ + if ((a)->alloced < (b)) \ + mpi_resize((a), (b)); \ + } while (0) + +/* Copy N limbs from S to D. */ +#define MPN_COPY(d, s, n) \ + do { \ + mpi_size_t _i; \ + for (_i = 0; _i < (n); _i++) \ + (d)[_i] = (s)[_i]; \ + } while (0) + +#define MPN_COPY_INCR(d, s, n) \ + do { \ + mpi_size_t _i; \ + for (_i = 0; _i < (n); _i++) \ + (d)[_i] = (s)[_i]; \ + } while (0) + + +#define MPN_COPY_DECR(d, s, n) \ + do { \ + mpi_size_t _i; \ + for (_i = (n)-1; _i >= 0; _i--) \ + (d)[_i] = (s)[_i]; \ + } while (0) + +/* Zero N limbs at D */ +#define MPN_ZERO(d, n) \ + do { \ + int _i; \ + for (_i = 0; _i < (n); _i++) \ + (d)[_i] = 0; \ + } while (0) + +#define MPN_NORMALIZE(d, n) \ + do { \ + while ((n) > 0) { \ + if ((d)[(n)-1]) \ + break; \ + (n)--; \ + } \ + } while (0) + +#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ + do { \ + if ((size) < KARATSUBA_THRESHOLD) \ + mul_n_basecase(prodp, up, vp, size); \ + else \ + mul_n(prodp, up, vp, size, tspace); \ + } while (0); + +/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest + * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB). + * If this would yield overflow, DI should be the largest possible number + * (i.e., only ones). For correct operation, the most significant bit of D + * has to be set. Put the quotient in Q and the remainder in R. + */ +#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \ + do { \ + mpi_limb_t _ql __maybe_unused; \ + mpi_limb_t _q, _r; \ + mpi_limb_t _xh, _xl; \ + umul_ppmm(_q, _ql, (nh), (di)); \ + _q += (nh); /* DI is 2**BITS_PER_MPI_LIMB too small */ \ + umul_ppmm(_xh, _xl, _q, (d)); \ + sub_ddmmss(_xh, _r, (nh), (nl), _xh, _xl); \ + if (_xh) { \ + sub_ddmmss(_xh, _r, _xh, _r, 0, (d)); \ + _q++; \ + if (_xh) { \ + sub_ddmmss(_xh, _r, _xh, _r, 0, (d)); \ + _q++; \ + } \ + } \ + if (_r >= (d)) { \ + _r -= (d); \ + _q++; \ + } \ + (r) = _r; \ + (q) = _q; \ + } while (0) + + +/*-- mpiutil.c --*/ +mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs); +void mpi_free_limb_space(mpi_ptr_t a); +void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs); + +static inline mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); +mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size); +static inline mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size); + +static inline mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); +mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size); +static inline mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size); + +/*-- mpih-cmp.c --*/ +int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size); + +/*-- mpih-mul.c --*/ + +struct karatsuba_ctx { + struct karatsuba_ctx *next; + mpi_ptr_t tspace; + mpi_size_t tspace_size; + mpi_ptr_t tp; + mpi_size_t tp_size; +}; + +void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx); + +mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); +mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); +int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result); +void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size); +void mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, + mpi_ptr_t tspace); +void mpihelp_mul_n(mpi_ptr_t prodp, + mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size); + +int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp, + mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize, + struct karatsuba_ctx *ctx); + +/*-- generic_mpih-mul1.c --*/ +mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); + +/*-- mpih-div.c --*/ +mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb); +mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs, + mpi_ptr_t np, mpi_size_t nsize, + mpi_ptr_t dp, mpi_size_t dsize); +mpi_limb_t mpihelp_divmod_1(mpi_ptr_t quot_ptr, + mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb); + +/*-- generic_mpih-[lr]shift.c --*/ +mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, + unsigned cnt); +mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, + unsigned cnt); + +/* Define stuff for longlong.h. */ +#define W_TYPE_SIZE BITS_PER_MPI_LIMB +typedef mpi_limb_t UWtype; +typedef unsigned int UHWtype; +#if defined(__GNUC__) +typedef unsigned int UQItype __attribute__ ((mode(QI))); +typedef int SItype __attribute__ ((mode(SI))); +typedef unsigned int USItype __attribute__ ((mode(SI))); +typedef int DItype __attribute__ ((mode(DI))); +typedef unsigned int UDItype __attribute__ ((mode(DI))); +#else +typedef unsigned char UQItype; +typedef long SItype; +typedef unsigned long USItype; +#endif + +#ifdef __GNUC__ +#include "mpi-inline.h" +#endif + +#endif /*G10_MPI_INTERNAL_H */ diff --git a/lib/mpi/mpi-inv.c b/lib/mpi/mpi-inv.c new file mode 100644 index 000000000..61e37d18f --- /dev/null +++ b/lib/mpi/mpi-inv.c @@ -0,0 +1,143 @@ +/* mpi-inv.c - MPI functions + * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "mpi-internal.h" + +/**************** + * Calculate the multiplicative inverse X of A mod N + * That is: Find the solution x for + * 1 = (a*x) mod n + */ +int mpi_invm(MPI x, MPI a, MPI n) +{ + /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X) + * modified according to Michael Penk's solution for Exercise 35 + * with further enhancement + */ + MPI u, v, u1, u2 = NULL, u3, v1, v2 = NULL, v3, t1, t2 = NULL, t3; + unsigned int k; + int sign; + int odd; + + if (!mpi_cmp_ui(a, 0)) + return 0; /* Inverse does not exists. */ + if (!mpi_cmp_ui(n, 1)) + return 0; /* Inverse does not exists. */ + + u = mpi_copy(a); + v = mpi_copy(n); + + for (k = 0; !mpi_test_bit(u, 0) && !mpi_test_bit(v, 0); k++) { + mpi_rshift(u, u, 1); + mpi_rshift(v, v, 1); + } + odd = mpi_test_bit(v, 0); + + u1 = mpi_alloc_set_ui(1); + if (!odd) + u2 = mpi_alloc_set_ui(0); + u3 = mpi_copy(u); + v1 = mpi_copy(v); + if (!odd) { + v2 = mpi_alloc(mpi_get_nlimbs(u)); + mpi_sub(v2, u1, u); /* U is used as const 1 */ + } + v3 = mpi_copy(v); + if (mpi_test_bit(u, 0)) { /* u is odd */ + t1 = mpi_alloc_set_ui(0); + if (!odd) { + t2 = mpi_alloc_set_ui(1); + t2->sign = 1; + } + t3 = mpi_copy(v); + t3->sign = !t3->sign; + goto Y4; + } else { + t1 = mpi_alloc_set_ui(1); + if (!odd) + t2 = mpi_alloc_set_ui(0); + t3 = mpi_copy(u); + } + + do { + do { + if (!odd) { + if (mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0)) { + /* one is odd */ + mpi_add(t1, t1, v); + mpi_sub(t2, t2, u); + } + mpi_rshift(t1, t1, 1); + mpi_rshift(t2, t2, 1); + mpi_rshift(t3, t3, 1); + } else { + if (mpi_test_bit(t1, 0)) + mpi_add(t1, t1, v); + mpi_rshift(t1, t1, 1); + mpi_rshift(t3, t3, 1); + } +Y4: + ; + } while (!mpi_test_bit(t3, 0)); /* while t3 is even */ + + if (!t3->sign) { + mpi_set(u1, t1); + if (!odd) + mpi_set(u2, t2); + mpi_set(u3, t3); + } else { + mpi_sub(v1, v, t1); + sign = u->sign; u->sign = !u->sign; + if (!odd) + mpi_sub(v2, u, t2); + u->sign = sign; + sign = t3->sign; t3->sign = !t3->sign; + mpi_set(v3, t3); + t3->sign = sign; + } + mpi_sub(t1, u1, v1); + if (!odd) + mpi_sub(t2, u2, v2); + mpi_sub(t3, u3, v3); + if (t1->sign) { + mpi_add(t1, t1, v); + if (!odd) + mpi_sub(t2, t2, u); + } + } while (mpi_cmp_ui(t3, 0)); /* while t3 != 0 */ + /* mpi_lshift( u3, k ); */ + mpi_set(x, u1); + + mpi_free(u1); + mpi_free(v1); + mpi_free(t1); + if (!odd) { + mpi_free(u2); + mpi_free(v2); + mpi_free(t2); + } + mpi_free(u3); + mpi_free(v3); + mpi_free(t3); + + mpi_free(u); + mpi_free(v); + return 1; +} +EXPORT_SYMBOL_GPL(mpi_invm); diff --git a/lib/mpi/mpi-mod.c b/lib/mpi/mpi-mod.c new file mode 100644 index 000000000..54fcc0156 --- /dev/null +++ b/lib/mpi/mpi-mod.c @@ -0,0 +1,157 @@ +/* mpi-mod.c - Modular reduction + * Copyright (C) 1998, 1999, 2001, 2002, 2003, + * 2007 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + */ + + +#include "mpi-internal.h" +#include "longlong.h" + +/* Context used with Barrett reduction. */ +struct barrett_ctx_s { + MPI m; /* The modulus - may not be modified. */ + int m_copied; /* If true, M needs to be released. */ + int k; + MPI y; + MPI r1; /* Helper MPI. */ + MPI r2; /* Helper MPI. */ + MPI r3; /* Helper MPI allocated on demand. */ +}; + + + +void mpi_mod(MPI rem, MPI dividend, MPI divisor) +{ + mpi_fdiv_r(rem, dividend, divisor); +} + +/* This function returns a new context for Barrett based operations on + * the modulus M. This context needs to be released using + * _gcry_mpi_barrett_free. If COPY is true M will be transferred to + * the context and the user may change M. If COPY is false, M may not + * be changed until gcry_mpi_barrett_free has been called. + */ +mpi_barrett_t mpi_barrett_init(MPI m, int copy) +{ + mpi_barrett_t ctx; + MPI tmp; + + mpi_normalize(m); + ctx = kcalloc(1, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + if (copy) { + ctx->m = mpi_copy(m); + ctx->m_copied = 1; + } else + ctx->m = m; + + ctx->k = mpi_get_nlimbs(m); + tmp = mpi_alloc(ctx->k + 1); + + /* Barrett precalculation: y = floor(b^(2k) / m). */ + mpi_set_ui(tmp, 1); + mpi_lshift_limbs(tmp, 2 * ctx->k); + mpi_fdiv_q(tmp, tmp, m); + + ctx->y = tmp; + ctx->r1 = mpi_alloc(2 * ctx->k + 1); + ctx->r2 = mpi_alloc(2 * ctx->k + 1); + + return ctx; +} + +void mpi_barrett_free(mpi_barrett_t ctx) +{ + if (ctx) { + mpi_free(ctx->y); + mpi_free(ctx->r1); + mpi_free(ctx->r2); + if (ctx->r3) + mpi_free(ctx->r3); + if (ctx->m_copied) + mpi_free(ctx->m); + kfree(ctx); + } +} + + +/* R = X mod M + * + * Using Barrett reduction. Before using this function + * _gcry_mpi_barrett_init must have been called to do the + * precalculations. CTX is the context created by this precalculation + * and also conveys M. If the Barret reduction could no be done a + * straightforward reduction method is used. + * + * We assume that these conditions are met: + * Input: x =(x_2k-1 ...x_0)_b + * m =(m_k-1 ....m_0)_b with m_k-1 != 0 + * Output: r = x mod m + */ +void mpi_mod_barrett(MPI r, MPI x, mpi_barrett_t ctx) +{ + MPI m = ctx->m; + int k = ctx->k; + MPI y = ctx->y; + MPI r1 = ctx->r1; + MPI r2 = ctx->r2; + int sign; + + mpi_normalize(x); + if (mpi_get_nlimbs(x) > 2*k) { + mpi_mod(r, x, m); + return; + } + + sign = x->sign; + x->sign = 0; + + /* 1. q1 = floor( x / b^k-1) + * q2 = q1 * y + * q3 = floor( q2 / b^k+1 ) + * Actually, we don't need qx, we can work direct on r2 + */ + mpi_set(r2, x); + mpi_rshift_limbs(r2, k-1); + mpi_mul(r2, r2, y); + mpi_rshift_limbs(r2, k+1); + + /* 2. r1 = x mod b^k+1 + * r2 = q3 * m mod b^k+1 + * r = r1 - r2 + * 3. if r < 0 then r = r + b^k+1 + */ + mpi_set(r1, x); + if (r1->nlimbs > k+1) /* Quick modulo operation. */ + r1->nlimbs = k+1; + mpi_mul(r2, r2, m); + if (r2->nlimbs > k+1) /* Quick modulo operation. */ + r2->nlimbs = k+1; + mpi_sub(r, r1, r2); + + if (mpi_has_sign(r)) { + if (!ctx->r3) { + ctx->r3 = mpi_alloc(k + 2); + mpi_set_ui(ctx->r3, 1); + mpi_lshift_limbs(ctx->r3, k + 1); + } + mpi_add(r, r, ctx->r3); + } + + /* 4. while r >= m do r = r - m */ + while (mpi_cmp(r, m) >= 0) + mpi_sub(r, r, m); + + x->sign = sign; +} + + +void mpi_mul_barrett(MPI w, MPI u, MPI v, mpi_barrett_t ctx) +{ + mpi_mul(w, u, v); + mpi_mod_barrett(w, w, ctx); +} diff --git a/lib/mpi/mpi-mul.c b/lib/mpi/mpi-mul.c new file mode 100644 index 000000000..8f5fa200f --- /dev/null +++ b/lib/mpi/mpi-mul.c @@ -0,0 +1,91 @@ +/* mpi-mul.c - MPI functions + * Copyright (C) 1994, 1996, 1998, 2001, 2002, + * 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + +#include "mpi-internal.h" + +void mpi_mul(MPI w, MPI u, MPI v) +{ + mpi_size_t usize, vsize, wsize; + mpi_ptr_t up, vp, wp; + mpi_limb_t cy; + int usign, vsign, sign_product; + int assign_wp = 0; + mpi_ptr_t tmp_limb = NULL; + + if (u->nlimbs < v->nlimbs) { + /* Swap U and V. */ + usize = v->nlimbs; + usign = v->sign; + up = v->d; + vsize = u->nlimbs; + vsign = u->sign; + vp = u->d; + } else { + usize = u->nlimbs; + usign = u->sign; + up = u->d; + vsize = v->nlimbs; + vsign = v->sign; + vp = v->d; + } + sign_product = usign ^ vsign; + wp = w->d; + + /* Ensure W has space enough to store the result. */ + wsize = usize + vsize; + if (w->alloced < wsize) { + if (wp == up || wp == vp) { + wp = mpi_alloc_limb_space(wsize); + assign_wp = 1; + } else { + mpi_resize(w, wsize); + wp = w->d; + } + } else { /* Make U and V not overlap with W. */ + if (wp == up) { + /* W and U are identical. Allocate temporary space for U. */ + up = tmp_limb = mpi_alloc_limb_space(usize); + /* Is V identical too? Keep it identical with U. */ + if (wp == vp) + vp = up; + /* Copy to the temporary space. */ + MPN_COPY(up, wp, usize); + } else if (wp == vp) { + /* W and V are identical. Allocate temporary space for V. */ + vp = tmp_limb = mpi_alloc_limb_space(vsize); + /* Copy to the temporary space. */ + MPN_COPY(vp, wp, vsize); + } + } + + if (!vsize) + wsize = 0; + else { + mpihelp_mul(wp, up, usize, vp, vsize, &cy); + wsize -= cy ? 0:1; + } + + if (assign_wp) + mpi_assign_limb_space(w, wp, wsize); + w->nlimbs = wsize; + w->sign = sign_product; + if (tmp_limb) + mpi_free_limb_space(tmp_limb); +} + +void mpi_mulm(MPI w, MPI u, MPI v, MPI m) +{ + mpi_mul(w, u, v); + mpi_tdiv_r(w, w, m); +} +EXPORT_SYMBOL_GPL(mpi_mulm); diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c new file mode 100644 index 000000000..2fd7a46d5 --- /dev/null +++ b/lib/mpi/mpi-pow.c @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* mpi-pow.c - MPI functions + * Copyright (C) 1994, 1996, 1998, 2000 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include <linux/sched.h> +#include <linux/string.h> +#include "mpi-internal.h" +#include "longlong.h" + +/**************** + * RES = BASE ^ EXP mod MOD + */ +int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) +{ + mpi_ptr_t mp_marker = NULL, bp_marker = NULL, ep_marker = NULL; + struct karatsuba_ctx karactx = {}; + mpi_ptr_t xp_marker = NULL; + mpi_ptr_t tspace = NULL; + mpi_ptr_t rp, ep, mp, bp; + mpi_size_t esize, msize, bsize, rsize; + int msign, bsign, rsign; + mpi_size_t size; + int mod_shift_cnt; + int negative_result; + int assign_rp = 0; + mpi_size_t tsize = 0; /* to avoid compiler warning */ + /* fixme: we should check that the warning is void */ + int rc = -ENOMEM; + + esize = exp->nlimbs; + msize = mod->nlimbs; + size = 2 * msize; + msign = mod->sign; + + rp = res->d; + ep = exp->d; + + if (!msize) + return -EINVAL; + + if (!esize) { + /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 + * depending on if MOD equals 1. */ + res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1; + if (res->nlimbs) { + if (mpi_resize(res, 1) < 0) + goto enomem; + rp = res->d; + rp[0] = 1; + } + res->sign = 0; + goto leave; + } + + /* Normalize MOD (i.e. make its most significant bit set) as required by + * mpn_divrem. This will make the intermediate values in the calculation + * slightly larger, but the correct result is obtained after a final + * reduction using the original MOD value. */ + mp = mp_marker = mpi_alloc_limb_space(msize); + if (!mp) + goto enomem; + mod_shift_cnt = count_leading_zeros(mod->d[msize - 1]); + if (mod_shift_cnt) + mpihelp_lshift(mp, mod->d, msize, mod_shift_cnt); + else + MPN_COPY(mp, mod->d, msize); + + bsize = base->nlimbs; + bsign = base->sign; + if (bsize > msize) { /* The base is larger than the module. Reduce it. */ + /* Allocate (BSIZE + 1) with space for remainder and quotient. + * (The quotient is (bsize - msize + 1) limbs.) */ + bp = bp_marker = mpi_alloc_limb_space(bsize + 1); + if (!bp) + goto enomem; + MPN_COPY(bp, base->d, bsize); + /* We don't care about the quotient, store it above the remainder, + * at BP + MSIZE. */ + mpihelp_divrem(bp + msize, 0, bp, bsize, mp, msize); + bsize = msize; + /* Canonicalize the base, since we are going to multiply with it + * quite a few times. */ + MPN_NORMALIZE(bp, bsize); + } else + bp = base->d; + + if (!bsize) { + res->nlimbs = 0; + res->sign = 0; + goto leave; + } + + if (res->alloced < size) { + /* We have to allocate more space for RES. If any of the input + * parameters are identical to RES, defer deallocation of the old + * space. */ + if (rp == ep || rp == mp || rp == bp) { + rp = mpi_alloc_limb_space(size); + if (!rp) + goto enomem; + assign_rp = 1; + } else { + if (mpi_resize(res, size) < 0) + goto enomem; + rp = res->d; + } + } else { /* Make BASE, EXP and MOD not overlap with RES. */ + if (rp == bp) { + /* RES and BASE are identical. Allocate temp. space for BASE. */ + BUG_ON(bp_marker); + bp = bp_marker = mpi_alloc_limb_space(bsize); + if (!bp) + goto enomem; + MPN_COPY(bp, rp, bsize); + } + if (rp == ep) { + /* RES and EXP are identical. Allocate temp. space for EXP. */ + ep = ep_marker = mpi_alloc_limb_space(esize); + if (!ep) + goto enomem; + MPN_COPY(ep, rp, esize); + } + if (rp == mp) { + /* RES and MOD are identical. Allocate temporary space for MOD. */ + BUG_ON(mp_marker); + mp = mp_marker = mpi_alloc_limb_space(msize); + if (!mp) + goto enomem; + MPN_COPY(mp, rp, msize); + } + } + + MPN_COPY(rp, bp, bsize); + rsize = bsize; + rsign = bsign; + + { + mpi_size_t i; + mpi_ptr_t xp; + int c; + mpi_limb_t e; + mpi_limb_t carry_limb; + + xp = xp_marker = mpi_alloc_limb_space(2 * (msize + 1)); + if (!xp) + goto enomem; + + negative_result = (ep[0] & 1) && base->sign; + + i = esize - 1; + e = ep[i]; + c = count_leading_zeros(e); + e = (e << c) << 1; /* shift the exp bits to the left, lose msb */ + c = BITS_PER_MPI_LIMB - 1 - c; + + /* Main loop. + * + * Make the result be pointed to alternately by XP and RP. This + * helps us avoid block copying, which would otherwise be necessary + * with the overlap restrictions of mpihelp_divmod. With 50% probability + * the result after this loop will be in the area originally pointed + * by RP (==RES->d), and with 50% probability in the area originally + * pointed to by XP. + */ + + for (;;) { + while (c) { + mpi_ptr_t tp; + mpi_size_t xsize; + + /*if (mpihelp_mul_n(xp, rp, rp, rsize) < 0) goto enomem */ + if (rsize < KARATSUBA_THRESHOLD) + mpih_sqr_n_basecase(xp, rp, rsize); + else { + if (!tspace) { + tsize = 2 * rsize; + tspace = + mpi_alloc_limb_space(tsize); + if (!tspace) + goto enomem; + } else if (tsize < (2 * rsize)) { + mpi_free_limb_space(tspace); + tsize = 2 * rsize; + tspace = + mpi_alloc_limb_space(tsize); + if (!tspace) + goto enomem; + } + mpih_sqr_n(xp, rp, rsize, tspace); + } + + xsize = 2 * rsize; + if (xsize > msize) { + mpihelp_divrem(xp + msize, 0, xp, xsize, + mp, msize); + xsize = msize; + } + + tp = rp; + rp = xp; + xp = tp; + rsize = xsize; + + if ((mpi_limb_signed_t) e < 0) { + /*mpihelp_mul( xp, rp, rsize, bp, bsize ); */ + if (bsize < KARATSUBA_THRESHOLD) { + mpi_limb_t tmp; + if (mpihelp_mul + (xp, rp, rsize, bp, bsize, + &tmp) < 0) + goto enomem; + } else { + if (mpihelp_mul_karatsuba_case + (xp, rp, rsize, bp, bsize, + &karactx) < 0) + goto enomem; + } + + xsize = rsize + bsize; + if (xsize > msize) { + mpihelp_divrem(xp + msize, 0, + xp, xsize, mp, + msize); + xsize = msize; + } + + tp = rp; + rp = xp; + xp = tp; + rsize = xsize; + } + e <<= 1; + c--; + cond_resched(); + } + + i--; + if (i < 0) + break; + e = ep[i]; + c = BITS_PER_MPI_LIMB; + } + + /* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT + * steps. Adjust the result by reducing it with the original MOD. + * + * Also make sure the result is put in RES->d (where it already + * might be, see above). + */ + if (mod_shift_cnt) { + carry_limb = + mpihelp_lshift(res->d, rp, rsize, mod_shift_cnt); + rp = res->d; + if (carry_limb) { + rp[rsize] = carry_limb; + rsize++; + } + } else { + MPN_COPY(res->d, rp, rsize); + rp = res->d; + } + + if (rsize >= msize) { + mpihelp_divrem(rp + msize, 0, rp, rsize, mp, msize); + rsize = msize; + } + + /* Remove any leading zero words from the result. */ + if (mod_shift_cnt) + mpihelp_rshift(rp, rp, rsize, mod_shift_cnt); + MPN_NORMALIZE(rp, rsize); + } + + if (negative_result && rsize) { + if (mod_shift_cnt) + mpihelp_rshift(mp, mp, msize, mod_shift_cnt); + mpihelp_sub(rp, mp, msize, rp, rsize); + rsize = msize; + rsign = msign; + MPN_NORMALIZE(rp, rsize); + } + res->nlimbs = rsize; + res->sign = rsign; + +leave: + rc = 0; +enomem: + mpihelp_release_karatsuba_ctx(&karactx); + if (assign_rp) + mpi_assign_limb_space(res, rp, size); + if (mp_marker) + mpi_free_limb_space(mp_marker); + if (bp_marker) + mpi_free_limb_space(bp_marker); + if (ep_marker) + mpi_free_limb_space(ep_marker); + if (xp_marker) + mpi_free_limb_space(xp_marker); + if (tspace) + mpi_free_limb_space(tspace); + return rc; +} +EXPORT_SYMBOL_GPL(mpi_powm); diff --git a/lib/mpi/mpi-sub-ui.c b/lib/mpi/mpi-sub-ui.c new file mode 100644 index 000000000..b41b082b5 --- /dev/null +++ b/lib/mpi/mpi-sub-ui.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* mpi-sub-ui.c - Subtract an unsigned integer from an MPI. + * + * Copyright 1991, 1993, 1994, 1996, 1999-2002, 2004, 2012, 2013, 2015 + * Free Software Foundation, Inc. + * + * This file was based on the GNU MP Library source file: + * https://gmplib.org/repo/gmp-6.2/file/510b83519d1c/mpz/aors_ui.h + * + * The GNU MP Library is free software; you can redistribute it and/or modify + * it under the terms of either: + * + * * the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your + * option) any later version. + * + * or + * + * * the GNU General Public License as published by the Free Software + * Foundation; either version 2 of the License, or (at your option) any + * later version. + * + * or both in parallel, as here. + * + * The GNU MP Library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received copies of the GNU General Public License and the + * GNU Lesser General Public License along with the GNU MP Library. If not, + * see https://www.gnu.org/licenses/. + */ + +#include "mpi-internal.h" + +int mpi_sub_ui(MPI w, MPI u, unsigned long vval) +{ + if (u->nlimbs == 0) { + if (mpi_resize(w, 1) < 0) + return -ENOMEM; + w->d[0] = vval; + w->nlimbs = (vval != 0); + w->sign = (vval != 0); + return 0; + } + + /* If not space for W (and possible carry), increase space. */ + if (mpi_resize(w, u->nlimbs + 1)) + return -ENOMEM; + + if (u->sign) { + mpi_limb_t cy; + + cy = mpihelp_add_1(w->d, u->d, u->nlimbs, (mpi_limb_t) vval); + w->d[u->nlimbs] = cy; + w->nlimbs = u->nlimbs + cy; + w->sign = 1; + } else { + /* The signs are different. Need exact comparison to determine + * which operand to subtract from which. + */ + if (u->nlimbs == 1 && u->d[0] < vval) { + w->d[0] = vval - u->d[0]; + w->nlimbs = 1; + w->sign = 1; + } else { + mpihelp_sub_1(w->d, u->d, u->nlimbs, (mpi_limb_t) vval); + /* Size can decrease with at most one limb. */ + w->nlimbs = (u->nlimbs - (w->d[u->nlimbs - 1] == 0)); + w->sign = 0; + } + } + + mpi_normalize(w); + return 0; +} +EXPORT_SYMBOL_GPL(mpi_sub_ui); diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c new file mode 100644 index 000000000..7054311d7 --- /dev/null +++ b/lib/mpi/mpicoder.c @@ -0,0 +1,752 @@ +/* mpicoder.c - Coder for the external representation of MPIs + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <linux/bitops.h> +#include <linux/count_zeros.h> +#include <linux/byteorder/generic.h> +#include <linux/scatterlist.h> +#include <linux/string.h> +#include "mpi-internal.h" + +#define MAX_EXTERN_SCAN_BYTES (16*1024*1024) +#define MAX_EXTERN_MPI_BITS 16384 + +/** + * mpi_read_raw_data - Read a raw byte stream as a positive integer + * @xbuffer: The data to read + * @nbytes: The amount of data to read + */ +MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes) +{ + const uint8_t *buffer = xbuffer; + int i, j; + unsigned nbits, nlimbs; + mpi_limb_t a; + MPI val = NULL; + + while (nbytes > 0 && buffer[0] == 0) { + buffer++; + nbytes--; + } + + nbits = nbytes * 8; + if (nbits > MAX_EXTERN_MPI_BITS) { + pr_info("MPI: mpi too large (%u bits)\n", nbits); + return NULL; + } + if (nbytes > 0) + nbits -= count_leading_zeros(buffer[0]) - (BITS_PER_LONG - 8); + + nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); + val = mpi_alloc(nlimbs); + if (!val) + return NULL; + val->nbits = nbits; + val->sign = 0; + val->nlimbs = nlimbs; + + if (nbytes > 0) { + i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; + i %= BYTES_PER_MPI_LIMB; + for (j = nlimbs; j > 0; j--) { + a = 0; + for (; i < BYTES_PER_MPI_LIMB; i++) { + a <<= 8; + a |= *buffer++; + } + i = 0; + val->d[j - 1] = a; + } + } + return val; +} +EXPORT_SYMBOL_GPL(mpi_read_raw_data); + +MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) +{ + const uint8_t *buffer = xbuffer; + unsigned int nbits, nbytes; + MPI val; + + if (*ret_nread < 2) + return ERR_PTR(-EINVAL); + nbits = buffer[0] << 8 | buffer[1]; + + if (nbits > MAX_EXTERN_MPI_BITS) { + pr_info("MPI: mpi too large (%u bits)\n", nbits); + return ERR_PTR(-EINVAL); + } + + nbytes = DIV_ROUND_UP(nbits, 8); + if (nbytes + 2 > *ret_nread) { + pr_info("MPI: mpi larger than buffer nbytes=%u ret_nread=%u\n", + nbytes, *ret_nread); + return ERR_PTR(-EINVAL); + } + + val = mpi_read_raw_data(buffer + 2, nbytes); + if (!val) + return ERR_PTR(-ENOMEM); + + *ret_nread = nbytes + 2; + return val; +} +EXPORT_SYMBOL_GPL(mpi_read_from_buffer); + +/**************** + * Fill the mpi VAL from the hex string in STR. + */ +int mpi_fromstr(MPI val, const char *str) +{ + int sign = 0; + int prepend_zero = 0; + int i, j, c, c1, c2; + unsigned int nbits, nbytes, nlimbs; + mpi_limb_t a; + + if (*str == '-') { + sign = 1; + str++; + } + + /* Skip optional hex prefix. */ + if (*str == '0' && str[1] == 'x') + str += 2; + + nbits = strlen(str); + if (nbits > MAX_EXTERN_SCAN_BYTES) { + mpi_clear(val); + return -EINVAL; + } + nbits *= 4; + if ((nbits % 8)) + prepend_zero = 1; + + nbytes = (nbits+7) / 8; + nlimbs = (nbytes+BYTES_PER_MPI_LIMB-1) / BYTES_PER_MPI_LIMB; + + if (val->alloced < nlimbs) + mpi_resize(val, nlimbs); + + i = BYTES_PER_MPI_LIMB - (nbytes % BYTES_PER_MPI_LIMB); + i %= BYTES_PER_MPI_LIMB; + j = val->nlimbs = nlimbs; + val->sign = sign; + for (; j > 0; j--) { + a = 0; + for (; i < BYTES_PER_MPI_LIMB; i++) { + if (prepend_zero) { + c1 = '0'; + prepend_zero = 0; + } else + c1 = *str++; + + if (!c1) { + mpi_clear(val); + return -EINVAL; + } + c2 = *str++; + if (!c2) { + mpi_clear(val); + return -EINVAL; + } + if (c1 >= '0' && c1 <= '9') + c = c1 - '0'; + else if (c1 >= 'a' && c1 <= 'f') + c = c1 - 'a' + 10; + else if (c1 >= 'A' && c1 <= 'F') + c = c1 - 'A' + 10; + else { + mpi_clear(val); + return -EINVAL; + } + c <<= 4; + if (c2 >= '0' && c2 <= '9') + c |= c2 - '0'; + else if (c2 >= 'a' && c2 <= 'f') + c |= c2 - 'a' + 10; + else if (c2 >= 'A' && c2 <= 'F') + c |= c2 - 'A' + 10; + else { + mpi_clear(val); + return -EINVAL; + } + a <<= 8; + a |= c; + } + i = 0; + val->d[j-1] = a; + } + + return 0; +} +EXPORT_SYMBOL_GPL(mpi_fromstr); + +MPI mpi_scanval(const char *string) +{ + MPI a; + + a = mpi_alloc(0); + if (!a) + return NULL; + + if (mpi_fromstr(a, string)) { + mpi_free(a); + return NULL; + } + mpi_normalize(a); + return a; +} +EXPORT_SYMBOL_GPL(mpi_scanval); + +static int count_lzeros(MPI a) +{ + mpi_limb_t alimb; + int i, lzeros = 0; + + for (i = a->nlimbs - 1; i >= 0; i--) { + alimb = a->d[i]; + if (alimb == 0) { + lzeros += sizeof(mpi_limb_t); + } else { + lzeros += count_leading_zeros(alimb) / 8; + break; + } + } + return lzeros; +} + +/** + * mpi_read_buffer() - read MPI to a bufer provided by user (msb first) + * + * @a: a multi precision integer + * @buf: bufer to which the output will be written to. Needs to be at + * leaset mpi_get_size(a) long. + * @buf_len: size of the buf. + * @nbytes: receives the actual length of the data written on success and + * the data to-be-written on -EOVERFLOW in case buf_len was too + * small. + * @sign: if not NULL, it will be set to the sign of a. + * + * Return: 0 on success or error code in case of error + */ +int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, + int *sign) +{ + uint8_t *p; +#if BYTES_PER_MPI_LIMB == 4 + __be32 alimb; +#elif BYTES_PER_MPI_LIMB == 8 + __be64 alimb; +#else +#error please implement for this limb size. +#endif + unsigned int n = mpi_get_size(a); + int i, lzeros; + + if (!buf || !nbytes) + return -EINVAL; + + if (sign) + *sign = a->sign; + + lzeros = count_lzeros(a); + + if (buf_len < n - lzeros) { + *nbytes = n - lzeros; + return -EOVERFLOW; + } + + p = buf; + *nbytes = n - lzeros; + + for (i = a->nlimbs - 1 - lzeros / BYTES_PER_MPI_LIMB, + lzeros %= BYTES_PER_MPI_LIMB; + i >= 0; i--) { +#if BYTES_PER_MPI_LIMB == 4 + alimb = cpu_to_be32(a->d[i]); +#elif BYTES_PER_MPI_LIMB == 8 + alimb = cpu_to_be64(a->d[i]); +#else +#error please implement for this limb size. +#endif + memcpy(p, (u8 *)&alimb + lzeros, BYTES_PER_MPI_LIMB - lzeros); + p += BYTES_PER_MPI_LIMB - lzeros; + lzeros = 0; + } + return 0; +} +EXPORT_SYMBOL_GPL(mpi_read_buffer); + +/* + * mpi_get_buffer() - Returns an allocated buffer with the MPI (msb first). + * Caller must free the return string. + * This function does return a 0 byte buffer with nbytes set to zero if the + * value of A is zero. + * + * @a: a multi precision integer. + * @nbytes: receives the length of this buffer. + * @sign: if not NULL, it will be set to the sign of the a. + * + * Return: Pointer to MPI buffer or NULL on error + */ +void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign) +{ + uint8_t *buf; + unsigned int n; + int ret; + + if (!nbytes) + return NULL; + + n = mpi_get_size(a); + + if (!n) + n++; + + buf = kmalloc(n, GFP_KERNEL); + + if (!buf) + return NULL; + + ret = mpi_read_buffer(a, buf, n, nbytes, sign); + + if (ret) { + kfree(buf); + return NULL; + } + return buf; +} +EXPORT_SYMBOL_GPL(mpi_get_buffer); + +/** + * mpi_write_to_sgl() - Funnction exports MPI to an sgl (msb first) + * + * This function works in the same way as the mpi_read_buffer, but it + * takes an sgl instead of u8 * buf. + * + * @a: a multi precision integer + * @sgl: scatterlist to write to. Needs to be at least + * mpi_get_size(a) long. + * @nbytes: the number of bytes to write. Leading bytes will be + * filled with zero. + * @sign: if not NULL, it will be set to the sign of a. + * + * Return: 0 on success or error code in case of error + */ +int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned nbytes, + int *sign) +{ + u8 *p, *p2; +#if BYTES_PER_MPI_LIMB == 4 + __be32 alimb; +#elif BYTES_PER_MPI_LIMB == 8 + __be64 alimb; +#else +#error please implement for this limb size. +#endif + unsigned int n = mpi_get_size(a); + struct sg_mapping_iter miter; + int i, x, buf_len; + int nents; + + if (sign) + *sign = a->sign; + + if (nbytes < n) + return -EOVERFLOW; + + nents = sg_nents_for_len(sgl, nbytes); + if (nents < 0) + return -EINVAL; + + sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC | SG_MITER_TO_SG); + sg_miter_next(&miter); + buf_len = miter.length; + p2 = miter.addr; + + while (nbytes > n) { + i = min_t(unsigned, nbytes - n, buf_len); + memset(p2, 0, i); + p2 += i; + nbytes -= i; + + buf_len -= i; + if (!buf_len) { + sg_miter_next(&miter); + buf_len = miter.length; + p2 = miter.addr; + } + } + + for (i = a->nlimbs - 1; i >= 0; i--) { +#if BYTES_PER_MPI_LIMB == 4 + alimb = a->d[i] ? cpu_to_be32(a->d[i]) : 0; +#elif BYTES_PER_MPI_LIMB == 8 + alimb = a->d[i] ? cpu_to_be64(a->d[i]) : 0; +#else +#error please implement for this limb size. +#endif + p = (u8 *)&alimb; + + for (x = 0; x < sizeof(alimb); x++) { + *p2++ = *p++; + if (!--buf_len) { + sg_miter_next(&miter); + buf_len = miter.length; + p2 = miter.addr; + } + } + } + + sg_miter_stop(&miter); + return 0; +} +EXPORT_SYMBOL_GPL(mpi_write_to_sgl); + +/* + * mpi_read_raw_from_sgl() - Function allocates an MPI and populates it with + * data from the sgl + * + * This function works in the same way as the mpi_read_raw_data, but it + * takes an sgl instead of void * buffer. i.e. it allocates + * a new MPI and reads the content of the sgl to the MPI. + * + * @sgl: scatterlist to read from + * @nbytes: number of bytes to read + * + * Return: Pointer to a new MPI or NULL on error + */ +MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes) +{ + struct sg_mapping_iter miter; + unsigned int nbits, nlimbs; + int x, j, z, lzeros, ents; + unsigned int len; + const u8 *buff; + mpi_limb_t a; + MPI val = NULL; + + ents = sg_nents_for_len(sgl, nbytes); + if (ents < 0) + return NULL; + + sg_miter_start(&miter, sgl, ents, SG_MITER_ATOMIC | SG_MITER_FROM_SG); + + lzeros = 0; + len = 0; + while (nbytes > 0) { + while (len && !*buff) { + lzeros++; + len--; + buff++; + } + + if (len && *buff) + break; + + sg_miter_next(&miter); + buff = miter.addr; + len = miter.length; + + nbytes -= lzeros; + lzeros = 0; + } + + miter.consumed = lzeros; + + nbytes -= lzeros; + nbits = nbytes * 8; + if (nbits > MAX_EXTERN_MPI_BITS) { + sg_miter_stop(&miter); + pr_info("MPI: mpi too large (%u bits)\n", nbits); + return NULL; + } + + if (nbytes > 0) + nbits -= count_leading_zeros(*buff) - (BITS_PER_LONG - 8); + + sg_miter_stop(&miter); + + nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); + val = mpi_alloc(nlimbs); + if (!val) + return NULL; + + val->nbits = nbits; + val->sign = 0; + val->nlimbs = nlimbs; + + if (nbytes == 0) + return val; + + j = nlimbs - 1; + a = 0; + z = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; + z %= BYTES_PER_MPI_LIMB; + + while (sg_miter_next(&miter)) { + buff = miter.addr; + len = min_t(unsigned, miter.length, nbytes); + nbytes -= len; + + for (x = 0; x < len; x++) { + a <<= 8; + a |= *buff++; + if (((z + x + 1) % BYTES_PER_MPI_LIMB) == 0) { + val->d[j--] = a; + a = 0; + } + } + z += x; + } + + return val; +} +EXPORT_SYMBOL_GPL(mpi_read_raw_from_sgl); + +/* Perform a two's complement operation on buffer P of size N bytes. */ +static void twocompl(unsigned char *p, unsigned int n) +{ + int i; + + for (i = n-1; i >= 0 && !p[i]; i--) + ; + if (i >= 0) { + if ((p[i] & 0x01)) + p[i] = (((p[i] ^ 0xfe) | 0x01) & 0xff); + else if ((p[i] & 0x02)) + p[i] = (((p[i] ^ 0xfc) | 0x02) & 0xfe); + else if ((p[i] & 0x04)) + p[i] = (((p[i] ^ 0xf8) | 0x04) & 0xfc); + else if ((p[i] & 0x08)) + p[i] = (((p[i] ^ 0xf0) | 0x08) & 0xf8); + else if ((p[i] & 0x10)) + p[i] = (((p[i] ^ 0xe0) | 0x10) & 0xf0); + else if ((p[i] & 0x20)) + p[i] = (((p[i] ^ 0xc0) | 0x20) & 0xe0); + else if ((p[i] & 0x40)) + p[i] = (((p[i] ^ 0x80) | 0x40) & 0xc0); + else + p[i] = 0x80; + + for (i--; i >= 0; i--) + p[i] ^= 0xff; + } +} + +int mpi_print(enum gcry_mpi_format format, unsigned char *buffer, + size_t buflen, size_t *nwritten, MPI a) +{ + unsigned int nbits = mpi_get_nbits(a); + size_t len; + size_t dummy_nwritten; + int negative; + + if (!nwritten) + nwritten = &dummy_nwritten; + + /* Libgcrypt does no always care to set clear the sign if the value + * is 0. For printing this is a bit of a surprise, in particular + * because if some of the formats don't support negative numbers but + * should be able to print a zero. Thus we need this extra test + * for a negative number. + */ + if (a->sign && mpi_cmp_ui(a, 0)) + negative = 1; + else + negative = 0; + + len = buflen; + *nwritten = 0; + if (format == GCRYMPI_FMT_STD) { + unsigned char *tmp; + int extra = 0; + unsigned int n; + + tmp = mpi_get_buffer(a, &n, NULL); + if (!tmp) + return -EINVAL; + + if (negative) { + twocompl(tmp, n); + if (!(*tmp & 0x80)) { + /* Need to extend the sign. */ + n++; + extra = 2; + } + } else if (n && (*tmp & 0x80)) { + /* Positive but the high bit of the returned buffer is set. + * Thus we need to print an extra leading 0x00 so that the + * output is interpreted as a positive number. + */ + n++; + extra = 1; + } + + if (buffer && n > len) { + /* The provided buffer is too short. */ + kfree(tmp); + return -E2BIG; + } + if (buffer) { + unsigned char *s = buffer; + + if (extra == 1) + *s++ = 0; + else if (extra) + *s++ = 0xff; + memcpy(s, tmp, n-!!extra); + } + kfree(tmp); + *nwritten = n; + return 0; + } else if (format == GCRYMPI_FMT_USG) { + unsigned int n = (nbits + 7)/8; + + /* Note: We ignore the sign for this format. */ + /* FIXME: for performance reasons we should put this into + * mpi_aprint because we can then use the buffer directly. + */ + + if (buffer && n > len) + return -E2BIG; + if (buffer) { + unsigned char *tmp; + + tmp = mpi_get_buffer(a, &n, NULL); + if (!tmp) + return -EINVAL; + memcpy(buffer, tmp, n); + kfree(tmp); + } + *nwritten = n; + return 0; + } else if (format == GCRYMPI_FMT_PGP) { + unsigned int n = (nbits + 7)/8; + + /* The PGP format can only handle unsigned integers. */ + if (negative) + return -EINVAL; + + if (buffer && n+2 > len) + return -E2BIG; + + if (buffer) { + unsigned char *tmp; + unsigned char *s = buffer; + + s[0] = nbits >> 8; + s[1] = nbits; + + tmp = mpi_get_buffer(a, &n, NULL); + if (!tmp) + return -EINVAL; + memcpy(s+2, tmp, n); + kfree(tmp); + } + *nwritten = n+2; + return 0; + } else if (format == GCRYMPI_FMT_SSH) { + unsigned char *tmp; + int extra = 0; + unsigned int n; + + tmp = mpi_get_buffer(a, &n, NULL); + if (!tmp) + return -EINVAL; + + if (negative) { + twocompl(tmp, n); + if (!(*tmp & 0x80)) { + /* Need to extend the sign. */ + n++; + extra = 2; + } + } else if (n && (*tmp & 0x80)) { + n++; + extra = 1; + } + + if (buffer && n+4 > len) { + kfree(tmp); + return -E2BIG; + } + + if (buffer) { + unsigned char *s = buffer; + + *s++ = n >> 24; + *s++ = n >> 16; + *s++ = n >> 8; + *s++ = n; + if (extra == 1) + *s++ = 0; + else if (extra) + *s++ = 0xff; + memcpy(s, tmp, n-!!extra); + } + kfree(tmp); + *nwritten = 4+n; + return 0; + } else if (format == GCRYMPI_FMT_HEX) { + unsigned char *tmp; + int i; + int extra = 0; + unsigned int n = 0; + + tmp = mpi_get_buffer(a, &n, NULL); + if (!tmp) + return -EINVAL; + if (!n || (*tmp & 0x80)) + extra = 2; + + if (buffer && 2*n + extra + negative + 1 > len) { + kfree(tmp); + return -E2BIG; + } + if (buffer) { + unsigned char *s = buffer; + + if (negative) + *s++ = '-'; + if (extra) { + *s++ = '0'; + *s++ = '0'; + } + + for (i = 0; i < n; i++) { + unsigned int c = tmp[i]; + + *s++ = (c >> 4) < 10 ? '0'+(c>>4) : 'A'+(c>>4)-10; + c &= 15; + *s++ = c < 10 ? '0'+c : 'A'+c-10; + } + *s++ = 0; + *nwritten = s - buffer; + } else { + *nwritten = 2*n + extra + negative + 1; + } + kfree(tmp); + return 0; + } else + return -EINVAL; +} +EXPORT_SYMBOL_GPL(mpi_print); diff --git a/lib/mpi/mpih-cmp.c b/lib/mpi/mpih-cmp.c new file mode 100644 index 000000000..f23709114 --- /dev/null +++ b/lib/mpi/mpih-cmp.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* mpihelp-sub.c - MPI helper functions + * Copyright (C) 1994, 1996 Free Software Foundation, Inc. + * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" + +/**************** + * Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE. + * There are no restrictions on the relative sizes of + * the two arguments. + * Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2. + */ +int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size) +{ + mpi_size_t i; + mpi_limb_t op1_word, op2_word; + + for (i = size - 1; i >= 0; i--) { + op1_word = op1_ptr[i]; + op2_word = op2_ptr[i]; + if (op1_word != op2_word) + goto diff; + } + return 0; + +diff: + /* This can *not* be simplified to + * op2_word - op2_word + * since that expression might give signed overflow. */ + return (op1_word > op2_word) ? 1 : -1; +} diff --git a/lib/mpi/mpih-div.c b/lib/mpi/mpih-div.c new file mode 100644 index 000000000..be70ee2e4 --- /dev/null +++ b/lib/mpi/mpih-div.c @@ -0,0 +1,517 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* mpihelp-div.c - MPI helper functions + * Copyright (C) 1994, 1996 Free Software Foundation, Inc. + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +#ifndef UMUL_TIME +#define UMUL_TIME 1 +#endif +#ifndef UDIV_TIME +#define UDIV_TIME UMUL_TIME +#endif + + +mpi_limb_t +mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb) +{ + mpi_size_t i; + mpi_limb_t n1, n0, r; + mpi_limb_t dummy __maybe_unused; + + /* Botch: Should this be handled at all? Rely on callers? */ + if (!dividend_size) + return 0; + + /* If multiplication is much faster than division, and the + * dividend is large, pre-invert the divisor, and use + * only multiplications in the inner loop. + * + * This test should be read: + * Does it ever help to use udiv_qrnnd_preinv? + * && Does what we save compensate for the inversion overhead? + */ + if (UDIV_TIME > (2 * UMUL_TIME + 6) + && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) { + int normalization_steps; + + normalization_steps = count_leading_zeros(divisor_limb); + if (normalization_steps) { + mpi_limb_t divisor_limb_inverted; + + divisor_limb <<= normalization_steps; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + * most significant bit (with weight 2**N) implicit. + * + * Special case for DIVISOR_LIMB == 100...000. + */ + if (!(divisor_limb << 1)) + divisor_limb_inverted = ~(mpi_limb_t)0; + else + udiv_qrnnd(divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); + + /* Possible optimization: + * if (r == 0 + * && divisor_limb > ((n1 << normalization_steps) + * | (dividend_ptr[dividend_size - 2] >> ...))) + * ...one division less... + */ + for (i = dividend_size - 2; i >= 0; i--) { + n0 = dividend_ptr[i]; + UDIV_QRNND_PREINV(dummy, r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), + divisor_limb, divisor_limb_inverted); + n1 = n0; + } + UDIV_QRNND_PREINV(dummy, r, r, + n1 << normalization_steps, + divisor_limb, divisor_limb_inverted); + return r >> normalization_steps; + } else { + mpi_limb_t divisor_limb_inverted; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + * most significant bit (with weight 2**N) implicit. + * + * Special case for DIVISOR_LIMB == 100...000. + */ + if (!(divisor_limb << 1)) + divisor_limb_inverted = ~(mpi_limb_t)0; + else + udiv_qrnnd(divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + i--; + + for ( ; i >= 0; i--) { + n0 = dividend_ptr[i]; + UDIV_QRNND_PREINV(dummy, r, r, + n0, divisor_limb, divisor_limb_inverted); + } + return r; + } + } else { + if (UDIV_NEEDS_NORMALIZATION) { + int normalization_steps; + + normalization_steps = count_leading_zeros(divisor_limb); + if (normalization_steps) { + divisor_limb <<= normalization_steps; + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); + + /* Possible optimization: + * if (r == 0 + * && divisor_limb > ((n1 << normalization_steps) + * | (dividend_ptr[dividend_size - 2] >> ...))) + * ...one division less... + */ + for (i = dividend_size - 2; i >= 0; i--) { + n0 = dividend_ptr[i]; + udiv_qrnnd(dummy, r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), + divisor_limb); + n1 = n0; + } + udiv_qrnnd(dummy, r, r, + n1 << normalization_steps, + divisor_limb); + return r >> normalization_steps; + } + } + /* No normalization needed, either because udiv_qrnnd doesn't require + * it, or because DIVISOR_LIMB is already normalized. + */ + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + i--; + + for (; i >= 0; i--) { + n0 = dividend_ptr[i]; + udiv_qrnnd(dummy, r, r, n0, divisor_limb); + } + return r; + } +} + +/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write + * the NSIZE-DSIZE least significant quotient limbs at QP + * and the DSIZE long remainder at NP. If QEXTRA_LIMBS is + * non-zero, generate that many fraction bits and append them after the + * other quotient limbs. + * Return the most significant limb of the quotient, this is always 0 or 1. + * + * Preconditions: + * 0. NSIZE >= DSIZE. + * 1. The most significant bit of the divisor must be set. + * 2. QP must either not overlap with the input operands at all, or + * QP + DSIZE >= NP must hold true. (This means that it's + * possible to put the quotient in the high part of NUM, right after the + * remainder in NUM. + * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. + */ + +mpi_limb_t +mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs, + mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize) +{ + mpi_limb_t most_significant_q_limb = 0; + + switch (dsize) { + case 0: + /* We are asked to divide by zero, so go ahead and do it! (To make + the compiler not remove this statement, return the value.) */ + /* + * existing clients of this function have been modified + * not to call it with dsize == 0, so this should not happen + */ + return 1 / dsize; + + case 1: + { + mpi_size_t i; + mpi_limb_t n1; + mpi_limb_t d; + + d = dp[0]; + n1 = np[nsize - 1]; + + if (n1 >= d) { + n1 -= d; + most_significant_q_limb = 1; + } + + qp += qextra_limbs; + for (i = nsize - 2; i >= 0; i--) + udiv_qrnnd(qp[i], n1, n1, np[i], d); + qp -= qextra_limbs; + + for (i = qextra_limbs - 1; i >= 0; i--) + udiv_qrnnd(qp[i], n1, n1, 0, d); + + np[0] = n1; + } + break; + + case 2: + { + mpi_size_t i; + mpi_limb_t n1, n0, n2; + mpi_limb_t d1, d0; + + np += nsize - 2; + d1 = dp[1]; + d0 = dp[0]; + n1 = np[1]; + n0 = np[0]; + + if (n1 >= d1 && (n1 > d1 || n0 >= d0)) { + sub_ddmmss(n1, n0, n1, n0, d1, d0); + most_significant_q_limb = 1; + } + + for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) { + mpi_limb_t q; + mpi_limb_t r; + + if (i >= qextra_limbs) + np--; + else + np[0] = 0; + + if (n1 == d1) { + /* Q should be either 111..111 or 111..110. Need special + * treatment of this rare case as normal division would + * give overflow. */ + q = ~(mpi_limb_t) 0; + + r = n0 + d1; + if (r < d1) { /* Carry in the addition? */ + add_ssaaaa(n1, n0, r - d0, + np[0], 0, d0); + qp[i] = q; + continue; + } + n1 = d0 - (d0 != 0 ? 1 : 0); + n0 = -d0; + } else { + udiv_qrnnd(q, r, n1, n0, d1); + umul_ppmm(n1, n0, d0, q); + } + + n2 = np[0]; +q_test: + if (n1 > r || (n1 == r && n0 > n2)) { + /* The estimated Q was too large. */ + q--; + sub_ddmmss(n1, n0, n1, n0, 0, d0); + r += d1; + if (r >= d1) /* If not carry, test Q again. */ + goto q_test; + } + + qp[i] = q; + sub_ddmmss(n1, n0, r, n2, n1, n0); + } + np[1] = n1; + np[0] = n0; + } + break; + + default: + { + mpi_size_t i; + mpi_limb_t dX, d1, n0; + + np += nsize - dsize; + dX = dp[dsize - 1]; + d1 = dp[dsize - 2]; + n0 = np[dsize - 1]; + + if (n0 >= dX) { + if (n0 > dX + || mpihelp_cmp(np, dp, dsize - 1) >= 0) { + mpihelp_sub_n(np, np, dp, dsize); + n0 = np[dsize - 1]; + most_significant_q_limb = 1; + } + } + + for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) { + mpi_limb_t q; + mpi_limb_t n1, n2; + mpi_limb_t cy_limb; + + if (i >= qextra_limbs) { + np--; + n2 = np[dsize]; + } else { + n2 = np[dsize - 1]; + MPN_COPY_DECR(np + 1, np, dsize - 1); + np[0] = 0; + } + + if (n0 == dX) { + /* This might over-estimate q, but it's probably not worth + * the extra code here to find out. */ + q = ~(mpi_limb_t) 0; + } else { + mpi_limb_t r; + + udiv_qrnnd(q, r, n0, np[dsize - 1], dX); + umul_ppmm(n1, n0, d1, q); + + while (n1 > r + || (n1 == r + && n0 > np[dsize - 2])) { + q--; + r += dX; + if (r < dX) /* I.e. "carry in previous addition?" */ + break; + n1 -= n0 < d1; + n0 -= d1; + } + } + + /* Possible optimization: We already have (q * n0) and (1 * n1) + * after the calculation of q. Taking advantage of that, we + * could make this loop make two iterations less. */ + cy_limb = mpihelp_submul_1(np, dp, dsize, q); + + if (n2 != cy_limb) { + mpihelp_add_n(np, np, dp, dsize); + q--; + } + + qp[i] = q; + n0 = np[dsize - 1]; + } + } + } + + return most_significant_q_limb; +} + +/**************** + * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. + * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. + * Return the single-limb remainder. + * There are no constraints on the value of the divisor. + * + * QUOT_PTR and DIVIDEND_PTR might point to the same limb. + */ + +mpi_limb_t +mpihelp_divmod_1(mpi_ptr_t quot_ptr, + mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb) +{ + mpi_size_t i; + mpi_limb_t n1, n0, r; + mpi_limb_t dummy __maybe_unused; + + if (!dividend_size) + return 0; + + /* If multiplication is much faster than division, and the + * dividend is large, pre-invert the divisor, and use + * only multiplications in the inner loop. + * + * This test should be read: + * Does it ever help to use udiv_qrnnd_preinv? + * && Does what we save compensate for the inversion overhead? + */ + if (UDIV_TIME > (2 * UMUL_TIME + 6) + && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) { + int normalization_steps; + + normalization_steps = count_leading_zeros(divisor_limb); + if (normalization_steps) { + mpi_limb_t divisor_limb_inverted; + + divisor_limb <<= normalization_steps; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + * most significant bit (with weight 2**N) implicit. + */ + /* Special case for DIVISOR_LIMB == 100...000. */ + if (!(divisor_limb << 1)) + divisor_limb_inverted = ~(mpi_limb_t)0; + else + udiv_qrnnd(divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); + + /* Possible optimization: + * if (r == 0 + * && divisor_limb > ((n1 << normalization_steps) + * | (dividend_ptr[dividend_size - 2] >> ...))) + * ...one division less... + */ + for (i = dividend_size - 2; i >= 0; i--) { + n0 = dividend_ptr[i]; + UDIV_QRNND_PREINV(quot_ptr[i + 1], r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), + divisor_limb, divisor_limb_inverted); + n1 = n0; + } + UDIV_QRNND_PREINV(quot_ptr[0], r, r, + n1 << normalization_steps, + divisor_limb, divisor_limb_inverted); + return r >> normalization_steps; + } else { + mpi_limb_t divisor_limb_inverted; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + * most significant bit (with weight 2**N) implicit. + */ + /* Special case for DIVISOR_LIMB == 100...000. */ + if (!(divisor_limb << 1)) + divisor_limb_inverted = ~(mpi_limb_t) 0; + else + udiv_qrnnd(divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + quot_ptr[i--] = 0; + + for ( ; i >= 0; i--) { + n0 = dividend_ptr[i]; + UDIV_QRNND_PREINV(quot_ptr[i], r, r, + n0, divisor_limb, divisor_limb_inverted); + } + return r; + } + } else { + if (UDIV_NEEDS_NORMALIZATION) { + int normalization_steps; + + normalization_steps = count_leading_zeros(divisor_limb); + if (normalization_steps) { + divisor_limb <<= normalization_steps; + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); + + /* Possible optimization: + * if (r == 0 + * && divisor_limb > ((n1 << normalization_steps) + * | (dividend_ptr[dividend_size - 2] >> ...))) + * ...one division less... + */ + for (i = dividend_size - 2; i >= 0; i--) { + n0 = dividend_ptr[i]; + udiv_qrnnd(quot_ptr[i + 1], r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), + divisor_limb); + n1 = n0; + } + udiv_qrnnd(quot_ptr[0], r, r, + n1 << normalization_steps, + divisor_limb); + return r >> normalization_steps; + } + } + /* No normalization needed, either because udiv_qrnnd doesn't require + * it, or because DIVISOR_LIMB is already normalized. + */ + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + quot_ptr[i--] = 0; + + for (; i >= 0; i--) { + n0 = dividend_ptr[i]; + udiv_qrnnd(quot_ptr[i], r, r, n0, divisor_limb); + } + return r; + } +} diff --git a/lib/mpi/mpih-mul.c b/lib/mpi/mpih-mul.c new file mode 100644 index 000000000..e5f1c84e3 --- /dev/null +++ b/lib/mpi/mpih-mul.c @@ -0,0 +1,509 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* mpihelp-mul.c - MPI helper functions + * Copyright (C) 1994, 1996, 1998, 1999, + * 2000 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include <linux/string.h> +#include "mpi-internal.h" +#include "longlong.h" + +#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ + do { \ + if ((size) < KARATSUBA_THRESHOLD) \ + mul_n_basecase(prodp, up, vp, size); \ + else \ + mul_n(prodp, up, vp, size, tspace); \ + } while (0); + +#define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \ + do { \ + if ((size) < KARATSUBA_THRESHOLD) \ + mpih_sqr_n_basecase(prodp, up, size); \ + else \ + mpih_sqr_n(prodp, up, size, tspace); \ + } while (0); + +/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), + * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are + * always stored. Return the most significant limb. + * + * Argument constraints: + * 1. PRODP != UP and PRODP != VP, i.e. the destination + * must be distinct from the multiplier and the multiplicand. + * + * + * Handle simple cases with traditional multiplication. + * + * This is the most critical code of multiplication. All multiplies rely + * on this, both small and huge. Small ones arrive here immediately. Huge + * ones arrive here as this is the base case for Karatsuba's recursive + * algorithm below. + */ + +static mpi_limb_t +mul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) +{ + mpi_size_t i; + mpi_limb_t cy; + mpi_limb_t v_limb; + + /* Multiply by the first limb in V separately, as the result can be + * stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = vp[0]; + if (v_limb <= 1) { + if (v_limb == 1) + MPN_COPY(prodp, up, size); + else + MPN_ZERO(prodp, size); + cy = 0; + } else + cy = mpihelp_mul_1(prodp, up, size, v_limb); + + prodp[size] = cy; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + * U with one limb from V, and add it to PROD. */ + for (i = 1; i < size; i++) { + v_limb = vp[i]; + if (v_limb <= 1) { + cy = 0; + if (v_limb == 1) + cy = mpihelp_add_n(prodp, prodp, up, size); + } else + cy = mpihelp_addmul_1(prodp, up, size, v_limb); + + prodp[size] = cy; + prodp++; + } + + return cy; +} + +static void +mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, + mpi_size_t size, mpi_ptr_t tspace) +{ + if (size & 1) { + /* The size is odd, and the code below doesn't handle that. + * Multiply the least significant (size - 1) limbs with a recursive + * call, and handle the most significant limb of S1 and S2 + * separately. + * A slightly faster way to do this would be to make the Karatsuba + * code below behave as if the size were even, and let it check for + * odd size in the end. I.e., in essence move this code to the end. + * Doing so would save us a recursive call, and potentially make the + * stack grow a lot less. + */ + mpi_size_t esize = size - 1; /* even size */ + mpi_limb_t cy_limb; + + MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace); + cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]); + prodp[esize + esize] = cy_limb; + cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]); + prodp[esize + size] = cy_limb; + } else { + /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. + * + * Split U in two pieces, U1 and U0, such that + * U = U0 + U1*(B**n), + * and V in V1 and V0, such that + * V = V0 + V1*(B**n). + * + * UV is then computed recursively using the identity + * + * 2n n n n + * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V + * 1 1 1 0 0 1 0 0 + * + * Where B = 2**BITS_PER_MP_LIMB. + */ + mpi_size_t hsize = size >> 1; + mpi_limb_t cy; + int negflg; + + /* Product H. ________________ ________________ + * |_____U1 x V1____||____U0 x V0_____| + * Put result in upper part of PROD and pass low part of TSPACE + * as new TSPACE. + */ + MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, + tspace); + + /* Product M. ________________ + * |_(U1-U0)(V0-V1)_| + */ + if (mpihelp_cmp(up + hsize, up, hsize) >= 0) { + mpihelp_sub_n(prodp, up + hsize, up, hsize); + negflg = 0; + } else { + mpihelp_sub_n(prodp, up, up + hsize, hsize); + negflg = 1; + } + if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) { + mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize); + negflg ^= 1; + } else { + mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize); + /* No change of NEGFLG. */ + } + /* Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. + */ + MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, + tspace + size); + + /* Add/copy product H. */ + MPN_COPY(prodp + hsize, prodp + size, hsize); + cy = mpihelp_add_n(prodp + size, prodp + size, + prodp + size + hsize, hsize); + + /* Add product M (if NEGFLG M is a negative number) */ + if (negflg) + cy -= + mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, + size); + else + cy += + mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, + size); + + /* Product L. ________________ ________________ + * |________________||____U0 x V0_____| + * Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. + */ + MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size); + + /* Add/copy Product L (twice) */ + + cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); + if (cy) + mpihelp_add_1(prodp + hsize + size, + prodp + hsize + size, hsize, cy); + + MPN_COPY(prodp, tspace, hsize); + cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, + hsize); + if (cy) + mpihelp_add_1(prodp + size, prodp + size, size, 1); + } +} + +void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size) +{ + mpi_size_t i; + mpi_limb_t cy_limb; + mpi_limb_t v_limb; + + /* Multiply by the first limb in V separately, as the result can be + * stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = up[0]; + if (v_limb <= 1) { + if (v_limb == 1) + MPN_COPY(prodp, up, size); + else + MPN_ZERO(prodp, size); + cy_limb = 0; + } else + cy_limb = mpihelp_mul_1(prodp, up, size, v_limb); + + prodp[size] = cy_limb; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + * U with one limb from V, and add it to PROD. */ + for (i = 1; i < size; i++) { + v_limb = up[i]; + if (v_limb <= 1) { + cy_limb = 0; + if (v_limb == 1) + cy_limb = mpihelp_add_n(prodp, prodp, up, size); + } else + cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb); + + prodp[size] = cy_limb; + prodp++; + } +} + +void +mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace) +{ + if (size & 1) { + /* The size is odd, and the code below doesn't handle that. + * Multiply the least significant (size - 1) limbs with a recursive + * call, and handle the most significant limb of S1 and S2 + * separately. + * A slightly faster way to do this would be to make the Karatsuba + * code below behave as if the size were even, and let it check for + * odd size in the end. I.e., in essence move this code to the end. + * Doing so would save us a recursive call, and potentially make the + * stack grow a lot less. + */ + mpi_size_t esize = size - 1; /* even size */ + mpi_limb_t cy_limb; + + MPN_SQR_N_RECURSE(prodp, up, esize, tspace); + cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]); + prodp[esize + esize] = cy_limb; + cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]); + + prodp[esize + size] = cy_limb; + } else { + mpi_size_t hsize = size >> 1; + mpi_limb_t cy; + + /* Product H. ________________ ________________ + * |_____U1 x U1____||____U0 x U0_____| + * Put result in upper part of PROD and pass low part of TSPACE + * as new TSPACE. + */ + MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace); + + /* Product M. ________________ + * |_(U1-U0)(U0-U1)_| + */ + if (mpihelp_cmp(up + hsize, up, hsize) >= 0) + mpihelp_sub_n(prodp, up + hsize, up, hsize); + else + mpihelp_sub_n(prodp, up, up + hsize, hsize); + + /* Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. */ + MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size); + + /* Add/copy product H */ + MPN_COPY(prodp + hsize, prodp + size, hsize); + cy = mpihelp_add_n(prodp + size, prodp + size, + prodp + size + hsize, hsize); + + /* Add product M (if NEGFLG M is a negative number). */ + cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size); + + /* Product L. ________________ ________________ + * |________________||____U0 x U0_____| + * Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. */ + MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size); + + /* Add/copy Product L (twice). */ + cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); + if (cy) + mpihelp_add_1(prodp + hsize + size, + prodp + hsize + size, hsize, cy); + + MPN_COPY(prodp, tspace, hsize); + cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, + hsize); + if (cy) + mpihelp_add_1(prodp + size, prodp + size, size, 1); + } +} + + +void mpihelp_mul_n(mpi_ptr_t prodp, + mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) +{ + if (up == vp) { + if (size < KARATSUBA_THRESHOLD) + mpih_sqr_n_basecase(prodp, up, size); + else { + mpi_ptr_t tspace; + tspace = mpi_alloc_limb_space(2 * size); + mpih_sqr_n(prodp, up, size, tspace); + mpi_free_limb_space(tspace); + } + } else { + if (size < KARATSUBA_THRESHOLD) + mul_n_basecase(prodp, up, vp, size); + else { + mpi_ptr_t tspace; + tspace = mpi_alloc_limb_space(2 * size); + mul_n(prodp, up, vp, size, tspace); + mpi_free_limb_space(tspace); + } + } +} + +int +mpihelp_mul_karatsuba_case(mpi_ptr_t prodp, + mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize, + struct karatsuba_ctx *ctx) +{ + mpi_limb_t cy; + + if (!ctx->tspace || ctx->tspace_size < vsize) { + if (ctx->tspace) + mpi_free_limb_space(ctx->tspace); + ctx->tspace = mpi_alloc_limb_space(2 * vsize); + if (!ctx->tspace) + return -ENOMEM; + ctx->tspace_size = vsize; + } + + MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace); + + prodp += vsize; + up += vsize; + usize -= vsize; + if (usize >= vsize) { + if (!ctx->tp || ctx->tp_size < vsize) { + if (ctx->tp) + mpi_free_limb_space(ctx->tp); + ctx->tp = mpi_alloc_limb_space(2 * vsize); + if (!ctx->tp) { + if (ctx->tspace) + mpi_free_limb_space(ctx->tspace); + ctx->tspace = NULL; + return -ENOMEM; + } + ctx->tp_size = vsize; + } + + do { + MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace); + cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize); + mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize, + cy); + prodp += vsize; + up += vsize; + usize -= vsize; + } while (usize >= vsize); + } + + if (usize) { + if (usize < KARATSUBA_THRESHOLD) { + mpi_limb_t tmp; + if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp) + < 0) + return -ENOMEM; + } else { + if (!ctx->next) { + ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL); + if (!ctx->next) + return -ENOMEM; + } + if (mpihelp_mul_karatsuba_case(ctx->tspace, + vp, vsize, + up, usize, + ctx->next) < 0) + return -ENOMEM; + } + + cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize); + mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy); + } + + return 0; +} + +void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx) +{ + struct karatsuba_ctx *ctx2; + + if (ctx->tp) + mpi_free_limb_space(ctx->tp); + if (ctx->tspace) + mpi_free_limb_space(ctx->tspace); + for (ctx = ctx->next; ctx; ctx = ctx2) { + ctx2 = ctx->next; + if (ctx->tp) + mpi_free_limb_space(ctx->tp); + if (ctx->tspace) + mpi_free_limb_space(ctx->tspace); + kfree(ctx); + } +} + +/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) + * and v (pointed to by VP, with VSIZE limbs), and store the result at + * PRODP. USIZE + VSIZE limbs are always stored, but if the input + * operands are normalized. Return the most significant limb of the + * result. + * + * NOTE: The space pointed to by PRODP is overwritten before finished + * with U and V, so overlap is an error. + * + * Argument constraints: + * 1. USIZE >= VSIZE. + * 2. PRODP != UP and PRODP != VP, i.e. the destination + * must be distinct from the multiplier and the multiplicand. + */ + +int +mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result) +{ + mpi_ptr_t prod_endp = prodp + usize + vsize - 1; + mpi_limb_t cy; + struct karatsuba_ctx ctx; + + if (vsize < KARATSUBA_THRESHOLD) { + mpi_size_t i; + mpi_limb_t v_limb; + + if (!vsize) { + *_result = 0; + return 0; + } + + /* Multiply by the first limb in V separately, as the result can be + * stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = vp[0]; + if (v_limb <= 1) { + if (v_limb == 1) + MPN_COPY(prodp, up, usize); + else + MPN_ZERO(prodp, usize); + cy = 0; + } else + cy = mpihelp_mul_1(prodp, up, usize, v_limb); + + prodp[usize] = cy; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + * U with one limb from V, and add it to PROD. */ + for (i = 1; i < vsize; i++) { + v_limb = vp[i]; + if (v_limb <= 1) { + cy = 0; + if (v_limb == 1) + cy = mpihelp_add_n(prodp, prodp, up, + usize); + } else + cy = mpihelp_addmul_1(prodp, up, usize, v_limb); + + prodp[usize] = cy; + prodp++; + } + + *_result = cy; + return 0; + } + + memset(&ctx, 0, sizeof ctx); + if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0) + return -ENOMEM; + mpihelp_release_karatsuba_ctx(&ctx); + *_result = *prod_endp; + return 0; +} diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c new file mode 100644 index 000000000..e6c4b3180 --- /dev/null +++ b/lib/mpi/mpiutil.c @@ -0,0 +1,330 @@ +/* mpiutil.ac - Utility functions for MPI + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "mpi-internal.h" + +/* Constants allocated right away at startup. */ +static MPI constants[MPI_NUMBER_OF_CONSTANTS]; + +/* Initialize the MPI subsystem. This is called early and allows to + * do some initialization without taking care of threading issues. + */ +static int __init mpi_init(void) +{ + int idx; + unsigned long value; + + for (idx = 0; idx < MPI_NUMBER_OF_CONSTANTS; idx++) { + switch (idx) { + case MPI_C_ZERO: + value = 0; + break; + case MPI_C_ONE: + value = 1; + break; + case MPI_C_TWO: + value = 2; + break; + case MPI_C_THREE: + value = 3; + break; + case MPI_C_FOUR: + value = 4; + break; + case MPI_C_EIGHT: + value = 8; + break; + default: + pr_err("MPI: invalid mpi_const selector %d\n", idx); + return -EFAULT; + } + constants[idx] = mpi_alloc_set_ui(value); + constants[idx]->flags = (16|32); + } + + return 0; +} +postcore_initcall(mpi_init); + +/* Return a constant MPI descripbed by NO which is one of the + * MPI_C_xxx macros. There is no need to copy this returned value; it + * may be used directly. + */ +MPI mpi_const(enum gcry_mpi_constants no) +{ + if ((int)no < 0 || no > MPI_NUMBER_OF_CONSTANTS) + pr_err("MPI: invalid mpi_const selector %d\n", no); + if (!constants[no]) + pr_err("MPI: MPI subsystem not initialized\n"); + return constants[no]; +} +EXPORT_SYMBOL_GPL(mpi_const); + +/**************** + * Note: It was a bad idea to use the number of limbs to allocate + * because on a alpha the limbs are large but we normally need + * integers of n bits - So we should chnage this to bits (or bytes). + * + * But mpi_alloc is used in a lot of places :-) + */ +MPI mpi_alloc(unsigned nlimbs) +{ + MPI a; + + a = kmalloc(sizeof *a, GFP_KERNEL); + if (!a) + return a; + + if (nlimbs) { + a->d = mpi_alloc_limb_space(nlimbs); + if (!a->d) { + kfree(a); + return NULL; + } + } else { + a->d = NULL; + } + + a->alloced = nlimbs; + a->nlimbs = 0; + a->sign = 0; + a->flags = 0; + a->nbits = 0; + return a; +} +EXPORT_SYMBOL_GPL(mpi_alloc); + +mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs) +{ + size_t len = nlimbs * sizeof(mpi_limb_t); + + if (!len) + return NULL; + + return kmalloc(len, GFP_KERNEL); +} + +void mpi_free_limb_space(mpi_ptr_t a) +{ + if (!a) + return; + + kfree_sensitive(a); +} + +void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs) +{ + mpi_free_limb_space(a->d); + a->d = ap; + a->alloced = nlimbs; +} + +/**************** + * Resize the array of A to NLIMBS. the additional space is cleared + * (set to 0) [done by m_realloc()] + */ +int mpi_resize(MPI a, unsigned nlimbs) +{ + void *p; + + if (nlimbs <= a->alloced) + return 0; /* no need to do it */ + + if (a->d) { + p = kcalloc(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL); + if (!p) + return -ENOMEM; + memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t)); + kfree_sensitive(a->d); + a->d = p; + } else { + a->d = kcalloc(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL); + if (!a->d) + return -ENOMEM; + } + a->alloced = nlimbs; + return 0; +} + +void mpi_clear(MPI a) +{ + if (!a) + return; + a->nlimbs = 0; + a->flags = 0; +} +EXPORT_SYMBOL_GPL(mpi_clear); + +void mpi_free(MPI a) +{ + if (!a) + return; + + if (a->flags & 4) + kfree_sensitive(a->d); + else + mpi_free_limb_space(a->d); + + if (a->flags & ~7) + pr_info("invalid flag value in mpi\n"); + kfree(a); +} +EXPORT_SYMBOL_GPL(mpi_free); + +/**************** + * Note: This copy function should not interpret the MPI + * but copy it transparently. + */ +MPI mpi_copy(MPI a) +{ + int i; + MPI b; + + if (a) { + b = mpi_alloc(a->nlimbs); + b->nlimbs = a->nlimbs; + b->sign = a->sign; + b->flags = a->flags; + b->flags &= ~(16|32); /* Reset the immutable and constant flags. */ + for (i = 0; i < b->nlimbs; i++) + b->d[i] = a->d[i]; + } else + b = NULL; + return b; +} + +/**************** + * This function allocates an MPI which is optimized to hold + * a value as large as the one given in the argument and allocates it + * with the same flags as A. + */ +MPI mpi_alloc_like(MPI a) +{ + MPI b; + + if (a) { + b = mpi_alloc(a->nlimbs); + b->nlimbs = 0; + b->sign = 0; + b->flags = a->flags; + } else + b = NULL; + + return b; +} + + +/* Set U into W and release U. If W is NULL only U will be released. */ +void mpi_snatch(MPI w, MPI u) +{ + if (w) { + mpi_assign_limb_space(w, u->d, u->alloced); + w->nlimbs = u->nlimbs; + w->sign = u->sign; + w->flags = u->flags; + u->alloced = 0; + u->nlimbs = 0; + u->d = NULL; + } + mpi_free(u); +} + + +MPI mpi_set(MPI w, MPI u) +{ + mpi_ptr_t wp, up; + mpi_size_t usize = u->nlimbs; + int usign = u->sign; + + if (!w) + w = mpi_alloc(mpi_get_nlimbs(u)); + RESIZE_IF_NEEDED(w, usize); + wp = w->d; + up = u->d; + MPN_COPY(wp, up, usize); + w->nlimbs = usize; + w->flags = u->flags; + w->flags &= ~(16|32); /* Reset the immutable and constant flags. */ + w->sign = usign; + return w; +} +EXPORT_SYMBOL_GPL(mpi_set); + +MPI mpi_set_ui(MPI w, unsigned long u) +{ + if (!w) + w = mpi_alloc(1); + /* FIXME: If U is 0 we have no need to resize and thus possible + * allocating the the limbs. + */ + RESIZE_IF_NEEDED(w, 1); + w->d[0] = u; + w->nlimbs = u ? 1 : 0; + w->sign = 0; + w->flags = 0; + return w; +} +EXPORT_SYMBOL_GPL(mpi_set_ui); + +MPI mpi_alloc_set_ui(unsigned long u) +{ + MPI w = mpi_alloc(1); + w->d[0] = u; + w->nlimbs = u ? 1 : 0; + w->sign = 0; + return w; +} + +/**************** + * Swap the value of A and B, when SWAP is 1. + * Leave the value when SWAP is 0. + * This implementation should be constant-time regardless of SWAP. + */ +void mpi_swap_cond(MPI a, MPI b, unsigned long swap) +{ + mpi_size_t i; + mpi_size_t nlimbs; + mpi_limb_t mask = ((mpi_limb_t)0) - swap; + mpi_limb_t x; + + if (a->alloced > b->alloced) + nlimbs = b->alloced; + else + nlimbs = a->alloced; + if (a->nlimbs > nlimbs || b->nlimbs > nlimbs) + return; + + for (i = 0; i < nlimbs; i++) { + x = mask & (a->d[i] ^ b->d[i]); + a->d[i] = a->d[i] ^ x; + b->d[i] = b->d[i] ^ x; + } + + x = mask & (a->nlimbs ^ b->nlimbs); + a->nlimbs = a->nlimbs ^ x; + b->nlimbs = b->nlimbs ^ x; + + x = mask & (a->sign ^ b->sign); + a->sign = a->sign ^ x; + b->sign = b->sign ^ x; +} + +MODULE_DESCRIPTION("Multiprecision maths library"); +MODULE_LICENSE("GPL"); |