From a5f28e22cc28ca53d092f6545bc924ee43a8bbe4 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 8 Apr 2024 17:18:54 +0200 Subject: Merging upstream version 115.9.0esr. Signed-off-by: Daniel Baumann --- security/nss/lib/freebl/mpi/mpi-priv.h | 3 + security/nss/lib/freebl/mpi/mpi.c | 300 +++++++++++++++++++++++++++++++-- security/nss/lib/freebl/mpi/mpi.h | 41 +++++ security/nss/lib/freebl/mpi/mpmontg.c | 29 ++-- 4 files changed, 345 insertions(+), 28 deletions(-) (limited to 'security/nss/lib/freebl/mpi') diff --git a/security/nss/lib/freebl/mpi/mpi-priv.h b/security/nss/lib/freebl/mpi/mpi-priv.h index 9447a818f3..b4333fb6b4 100644 --- a/security/nss/lib/freebl/mpi/mpi-priv.h +++ b/security/nss/lib/freebl/mpi/mpi-priv.h @@ -204,6 +204,9 @@ void MPI_ASM_DECL s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c); +void MPI_ASM_DECL s_mpv_mul_d_add_propCT(const mp_digit *a, + mp_size a_len, mp_digit b, + mp_digit *c, mp_size c_len); void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs); diff --git a/security/nss/lib/freebl/mpi/mpi.c b/security/nss/lib/freebl/mpi/mpi.c index 2e6cd84664..7749dc710f 100644 --- a/security/nss/lib/freebl/mpi/mpi.c +++ b/security/nss/lib/freebl/mpi/mpi.c @@ -10,6 +10,8 @@ #include "mpi-priv.h" #include "mplogic.h" +#include + #if defined(__arm__) && \ ((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__)) /* 16-bit thumb or ARM v3 doesn't work inlined assember version */ @@ -802,15 +804,18 @@ CLEANUP: /* }}} */ -/* {{{ mp_mul(a, b, c) */ +/* {{{ s_mp_mulg(a, b, c) */ /* - mp_mul(a, b, c) + s_mp_mulg(a, b, c) - Compute c = a * b. All parameters may be identical. + Compute c = a * b. All parameters may be identical. if constantTime is set, + then the operations are done in constant time. The original is mostly + constant time as long as s_mpv_mul_d_add() is constant time. This is true + of the x86 assembler, as well as the current c code. */ mp_err -mp_mul(const mp_int *a, const mp_int *b, mp_int *c) +s_mp_mulg(const mp_int *a, const mp_int *b, mp_int *c, int constantTime) { mp_digit *pb; mp_int tmp; @@ -846,7 +851,14 @@ mp_mul(const mp_int *a, const mp_int *b, mp_int *c) goto CLEANUP; #ifdef NSS_USE_COMBA - if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) { + /* comba isn't constant time because it clamps! If we cared + * (we needed a constant time version of multiply that was 'faster' + * we could easily pass constantTime down to the comba code and + * get it to skip the clamp... but here are assembler versions + * which add comba to platforms that can't compile the normal + * comba's imbedded assembler which would also need to change, so + * for now we just skip comba when we are running constant time. */ + if (!constantTime && (MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) { if (MP_USED(a) == 4) { s_mp_mul_comba_4(a, b, c); goto CLEANUP; @@ -876,13 +888,15 @@ mp_mul(const mp_int *a, const mp_int *b, mp_int *c) mp_digit b_i = *pb++; /* Inner product: Digits of a */ - if (b_i) + if (constantTime || b_i) s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib); else MP_DIGIT(c, ib + useda) = b_i; } - s_mp_clamp(c); + if (!constantTime) { + s_mp_clamp(c); + } if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ) SIGN(c) = ZPOS; @@ -892,10 +906,54 @@ mp_mul(const mp_int *a, const mp_int *b, mp_int *c) CLEANUP: mp_clear(&tmp); return res; +} /* end smp_mulg() */ + +/* }}} */ + +/* {{{ mp_mul(a, b, c) */ + +/* + mp_mul(a, b, c) + + Compute c = a * b. All parameters may be identical. + */ + +mp_err +mp_mul(const mp_int *a, const mp_int *b, mp_int *c) +{ + return s_mp_mulg(a, b, c, 0); } /* end mp_mul() */ /* }}} */ +/* {{{ mp_mulCT(a, b, c) */ + +/* + mp_mulCT(a, b, c) + + Compute c = a * b. In constant time. Parameters may not be identical. + NOTE: a and b may be modified. + */ + +mp_err +mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize) +{ + mp_err res; + + /* make the multiply values fixed length so multiply + * doesn't leak the length. at this point all the + * values are blinded, but once we finish we want the + * output size to be hidden (so no clamping the out put) */ + MP_CHECKOK(s_mp_pad(a, setSize)); + MP_CHECKOK(s_mp_pad(b, setSize)); + MP_CHECKOK(s_mp_pad(c, 2 * setSize)); + MP_CHECKOK(s_mp_mulg(a, b, c, 1)); +CLEANUP: + return res; +} /* end mp_mulCT() */ + +/* }}} */ + /* {{{ mp_sqr(a, sqr) */ #if MP_SQUARE @@ -1268,6 +1326,138 @@ mp_mod(const mp_int *a, const mp_int *m, mp_int *c) /* }}} */ +/* {{{ s_mp_subCT_d(a, b, borrow, c) */ + +/* + s_mp_subCT_d(a, b, borrow, c) + + Compute c = (a -b) - subtract in constant time. returns borrow + */ +mp_digit +s_mp_subCT_d(mp_digit a, mp_digit b, mp_digit borrow, mp_digit *ret) +{ + *ret = a - b - borrow; + return MP_CT_LTU(a, *ret) | (MP_CT_EQ(a, *ret) & borrow); +} /* s_mp_subCT_d() */ + +/* }}} */ + +/* {{{ mp_subCT(a, b, ret, borrow) */ + +/* return ret= a - b and borrow in borrow. done in constant time. + * b could be modified. + */ +mp_err +mp_subCT(const mp_int *a, mp_int *b, mp_int *ret, mp_digit *borrow) +{ + mp_size used_a = MP_USED(a); + mp_size i; + mp_err res; + + MP_CHECKOK(s_mp_pad(b, used_a)); + MP_CHECKOK(s_mp_pad(ret, used_a)); + *borrow = 0; + for (i = 0; i < used_a; i++) { + *borrow = s_mp_subCT_d(MP_DIGIT(a, i), MP_DIGIT(b, i), *borrow, + &MP_DIGIT(ret, i)); + } + + res = MP_OKAY; +CLEANUP: + return res; +} /* end mp_subCT() */ + +/* }}} */ + +/* {{{ mp_selectCT(cond, a, b, ret) */ + +/* + * return ret= cond ? a : b; cond should be either 0 or 1 + */ +mp_err +mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret) +{ + mp_size used_a = MP_USED(a); + mp_err res; + mp_size i; + + cond *= MP_DIGIT_MAX; + + /* we currently require these to be equal on input, + * we could use pad to extend one of them, but that might + * leak data as it wouldn't be constant time */ + if (used_a != MP_USED(b)) { + return MP_BADARG; + } + + MP_CHECKOK(s_mp_pad(ret, used_a)); + for (i = 0; i < used_a; i++) { + MP_DIGIT(ret, i) = MP_CT_SEL_DIGIT(cond, MP_DIGIT(a, i), MP_DIGIT(b, i)); + } + res = MP_OKAY; +CLEANUP: + return res; +} /* end mp_selectCT() */ + +/* {{{ mp_reduceCT(a, m, c) */ + +/* + mp_reduceCT(a, m, c) + + Compute c = aR^-1 (mod m) in constant time. + input should be in montgomery form. If input is the + result of a montgomery multiply then out put will be + in mongomery form. + Result will be reduced to MP_USED(m), but not be + clamped. + */ + +mp_err +mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *c) +{ + mp_size used_m = MP_USED(m); + mp_size used_c = used_m * 2 + 1; + mp_digit *m_digits, *c_digits; + mp_size i; + mp_digit borrow, carry; + mp_err res; + mp_int sub; + + MP_DIGITS(&sub) = 0; + MP_CHECKOK(mp_init_size(&sub, used_m)); + + if (a != c) { + MP_CHECKOK(mp_copy(a, c)); + } + MP_CHECKOK(s_mp_pad(c, used_c)); + m_digits = MP_DIGITS(m); + c_digits = MP_DIGITS(c); + for (i = 0; i < used_m; i++) { + mp_digit m_i = MP_DIGIT(c, i) * n0i; + s_mpv_mul_d_add_propCT(m_digits, used_m, m_i, c_digits++, used_c--); + } + s_mp_rshd(c, used_m); + /* MP_USED(c) should be used_m+1 with the high word being any carry + * from the previous multiply, save that carry and drop the high + * word for the substraction below */ + carry = MP_DIGIT(c, used_m); + MP_DIGIT(c, used_m) = 0; + MP_USED(c) = used_m; + /* mp_subCT wants c and m to be the same size, we've already + * guarrenteed that in the previous statement, so mp_subCT won't actually + * modify m, so it's safe to recast */ + MP_CHECKOK(mp_subCT(c, (mp_int *)m, &sub, &borrow)); + + /* we return c-m if c >= m no borrow or there was a borrow and a carry */ + MP_CHECKOK(mp_selectCT(borrow ^ carry, c, &sub, c)); + res = MP_OKAY; +CLEANUP: + mp_clear(&sub); + return res; +} /* end mp_reduceCT() */ + +/* }}} */ + /* {{{ mp_mod_d(a, d, c) */ /* @@ -1384,6 +1574,37 @@ mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c) /* }}} */ +/* {{{ mp_mulmontmodCT(a, b, m, c) */ + +/* + mp_mulmontmodCT(a, b, m, c) + + Compute c = (a * b) mod m in constant time wrt a and b. either a or b + should be in montgomery form and the output is native. If both a and b + are in montgomery form, then the output will also be in montgomery form + and can be recovered with an mp_reduceCT call. + NOTE: a and b may be modified. + */ + +mp_err +mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i, + mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); + + if ((res = mp_mulCT(a, b, c, MP_USED(m))) != MP_OKAY) + return res; + + if ((res = mp_reduceCT(c, m, n0i, c)) != MP_OKAY) + return res; + + return MP_OKAY; +} + +/* }}} */ + /* {{{ mp_sqrmod(a, m, c) */ #if MP_SQUARE @@ -3941,14 +4162,62 @@ s_mp_mul(mp_int *a, const mp_int *b) a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \ a1b0 += a0b1; \ Phi += a1b0 >> MP_HALF_DIGIT_BIT; \ - if (a1b0 < a0b1) \ - Phi += MP_HALF_RADIX; \ + Phi += (MP_CT_LTU(a1b0, a0b1)) << MP_HALF_DIGIT_BIT; \ a1b0 <<= MP_HALF_DIGIT_BIT; \ Plo += a1b0; \ - if (Plo < a1b0) \ - ++Phi; \ + Phi += MP_CT_LTU(Plo, a1b0); \ + } +#endif + +/* Constant time version of s_mpv_mul_d_add_prop. + * Presently, this is only used by the Constant time Montgomery arithmetic code. */ +/* c += a * b */ +void +s_mpv_mul_d_add_propCT(const mp_digit *a, mp_size a_len, mp_digit b, + mp_digit *c, mp_size c_len) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD) + mp_digit d = 0; + + c_len -= a_len; + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + *c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + + /* propagate the carry to the end, even if carry is zero */ + while (c_len--) { + mp_word w = (mp_word)*c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } +#else + mp_digit carry = 0; + c_len -= a_len; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + a1b1 += MP_CT_LTU(a0b0, carry); + a0b0 += a_i = *c; + a1b1 += MP_CT_LTU(a0b0, a_i); + + *c++ = a0b0; + carry = a1b1; + } + /* propagate the carry to the end, even if carry is zero */ + while (c_len--) { + mp_digit c_i = *c; + carry += c_i; + *c++ = carry; + carry = MP_CT_LTU(carry, c_i); } #endif +} #if !defined(MP_ASSEMBLY_MULTIPLY) /* c = a * b */ @@ -3974,8 +4243,7 @@ s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) MP_MUL_DxD(a_i, b, a1b1, a0b0); a0b0 += carry; - if (a0b0 < carry) - ++a1b1; + a1b1 += MP_CT_LTU(a0b0, carry); *c++ = a0b0; carry = a1b1; } @@ -4007,11 +4275,9 @@ s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, MP_MUL_DxD(a_i, b, a1b1, a0b0); a0b0 += carry; - if (a0b0 < carry) - ++a1b1; + a1b1 += MP_CT_LTU(a0b0, carry); a0b0 += a_i = *c; - if (a0b0 < a_i) - ++a1b1; + a1b1 += MP_CT_LTU(a0b0, a_i); *c++ = a0b0; carry = a1b1; } diff --git a/security/nss/lib/freebl/mpi/mpi.h b/security/nss/lib/freebl/mpi/mpi.h index 4ba9b6a4bf..dd129db0d6 100644 --- a/security/nss/lib/freebl/mpi/mpi.h +++ b/security/nss/lib/freebl/mpi/mpi.h @@ -150,6 +150,38 @@ typedef int mp_sword; /* This defines the maximum I/O base (minimum is 2) */ #define MP_MAX_RADIX 64 +/* Constant Time Macros on mp_digits */ +#define MP_CT_HIGH_TO_LOW(x) ((mp_digit)((mp_digit)(x) >> (MP_DIGIT_BIT - 1))) +#define MP_CT_TRUE ((mp_digit)1) +#define MP_CT_FALSE ((mp_digit)0) + +/* basic zero and non zero tests */ +#define MP_CT_NOT_ZERO(x) (MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x))))) +#define MP_CT_ZERO(x) (MP_CT_TRUE ^ MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x))))) + +/* basic constant-time helper macro for equalities and inequalities. + * The inequalities will produce incorrect results if + * abs(a-b) >= MP_DIGIT_SIZE/2. This can be avoided if unsigned values stay + * within the range 0-MP_DIGIT_MAX/2. */ +#define MP_CT_EQ(a, b) MP_CT_ZERO(((a) ^ (b))) +#define MP_CT_NE(a, b) MP_CT_NOT_ZERO(((a) ^ (b))) +#define MP_CT_GT(a, b) MP_CT_HIGH_TO_LOW((b) - (a)) +#define MP_CT_LT(a, b) MP_CT_HIGH_TO_LOW((a) - (b)) +#define MP_CT_GE(a, b) (MP_CT_TRUE ^ MP_CT_LT(a, b)) +#define MP_CT_LE(a, b) (MP_CT_TRUE ^ MP_CT_GT(a, b)) + +/* use constant time result to select a boolean value + * or an mp digit depending on the args */ +#define MP_CT_SEL(m, l, r) ((r) ^ ((m) & ((r) ^ (l)))) +#define MP_CT_SELB(m, l, r) MP_CT_SEL(m, l, r) /* mask, l and r are booleans */ +#define MP_CT_SEL_DIGIT(m, l, r) MP_CT_SEL(m, l, r) /*mask, l, and r are mp_digit */ + +/* full inequalities that work with full mp_digit values */ +#define MP_CT_OVERFLOW(a, b, c, d) \ + MP_CT_SELB(MP_CT_HIGH_TO_LOW((a) ^ (b)), \ + (MP_CT_HIGH_TO_LOW(d)), c) +#define MP_CT_LTU(a, b) MP_CT_OVERFLOW(a, b, MP_CT_LT(a, b), b) + typedef struct { mp_sign sign; /* sign of this quantity */ mp_size alloc; /* how many digits allocated */ @@ -190,7 +222,9 @@ mp_err mp_neg(const mp_int *a, mp_int *b); /* Full arithmetic */ mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c); mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c); +mp_err mp_subCT(const mp_int *a, mp_int *b, mp_int *c, mp_digit *borrow); mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c); +mp_err mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize); #if MP_SQUARE mp_err mp_sqr(const mp_int *a, mp_int *b); #else @@ -217,6 +251,12 @@ mp_err mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c); #endif /* MP_MODARITH */ +/* montgomery math */ +mp_err mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont); +mp_digit mp_calculate_mont_n0i(const mp_int *N); +mp_err mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *ct); +mp_err mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i, mp_int *c); + /* Comparisons */ int mp_cmp_z(const mp_int *a); int mp_cmp_d(const mp_int *a, mp_digit d); @@ -224,6 +264,7 @@ int mp_cmp(const mp_int *a, const mp_int *b); int mp_cmp_mag(const mp_int *a, const mp_int *b); int mp_isodd(const mp_int *a); int mp_iseven(const mp_int *a); +mp_err mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret); /* Number theoretic */ mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c); diff --git a/security/nss/lib/freebl/mpi/mpmontg.c b/security/nss/lib/freebl/mpi/mpmontg.c index 36ae51b352..d7a9958672 100644 --- a/security/nss/lib/freebl/mpi/mpmontg.c +++ b/security/nss/lib/freebl/mpi/mpmontg.c @@ -129,20 +129,27 @@ CLEANUP: } #endif -STATIC mp_err -s_mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont) +mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont) { mp_err res; /* xMont = x * R mod N where N is modulus */ - MP_CHECKOK(mp_copy(x, xMont)); - MP_CHECKOK(s_mp_lshd(xMont, MP_USED(&mmm->N))); /* xMont = x << b */ - MP_CHECKOK(mp_div(xMont, &mmm->N, 0, xMont)); /* mod N */ + if (x != xMont) { + MP_CHECKOK(mp_copy(x, xMont)); + } + MP_CHECKOK(s_mp_lshd(xMont, MP_USED(N))); /* xMont = x << b */ + MP_CHECKOK(mp_div(xMont, N, 0, xMont)); /* mod N */ CLEANUP: return res; } +mp_digit +mp_calculate_mont_n0i(const mp_int *N) +{ + return 0 - s_mp_invmod_radix(MP_DIGIT(N, 0)); +} + #ifdef MP_USING_MONT_MULF /* the floating point multiply is already cache safe, @@ -198,7 +205,7 @@ mp_exptmod_f(const mp_int *montBase, MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2)); mp_set(&accum1, 1); - MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1)); + MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1)); MP_CHECKOK(s_mp_pad(&accum1, nLen)); oddPowSize = 2 * nLen + 1; @@ -478,7 +485,7 @@ mp_exptmod_i(const mp_int *montBase, /* set accumulator to montgomery residue of 1 */ mp_set(&accum1, 1); - MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1)); + MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1)); pa1 = &accum1; pa2 = &accum2; @@ -867,7 +874,7 @@ mp_exptmod_safe_i(const mp_int *montBase, MP_CHECKOK(mp_init_size(&accum[2], 3 * nLen + 2)); MP_CHECKOK(mp_init_size(&accum[3], 3 * nLen + 2)); mp_set(&accum[0], 1); - MP_CHECKOK(s_mp_to_mont(&accum[0], mmm, &accum[0])); + MP_CHECKOK(mp_to_mont(&accum[0], &(mmm->N), &accum[0])); MP_CHECKOK(mp_copy(montBase, &accum[1])); SQR(montBase, &accum[2]); MUL_NOWEAVE(montBase, &accum[2], &accum[3]); @@ -886,7 +893,7 @@ mp_exptmod_safe_i(const mp_int *montBase, } else { if (first_window == 0) { mp_set(&accum1, 1); - MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1)); + MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1)); } else { /* assert first_window == 1? */ MP_CHECKOK(mp_copy(montBase, &accum1)); @@ -1057,9 +1064,9 @@ mp_exptmod(const mp_int *inBase, const mp_int *exponent, /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX ** where n0 = least significant mp_digit of N, the modulus. */ - mmm.n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(modulus, 0)); + mmm.n0prime = mp_calculate_mont_n0i(modulus); - MP_CHECKOK(s_mp_to_mont(base, &mmm, &montBase)); + MP_CHECKOK(mp_to_mont(base, modulus, &montBase)); bits_in_exponent = mpl_significant_bits(exponent); #ifdef MP_USING_CACHE_SAFE_MOD_EXP -- cgit v1.2.3