4 files changed, 345 insertions, 28 deletions
diff --git a/security/nss/lib/freebl/mpi/mpi-priv.h b/security/nss/lib/freebl/mpi/mpi-priv.h
index 9447a818f3..b4333fb6b4 100644
--- a/security/nss/lib/freebl/mpi/mpi-priv.h
+++ b/security/nss/lib/freebl/mpi/mpi-priv.h
@@ -204,6 +204,9 @@ void MPI_ASM_DECL s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
 void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a,
                                        mp_size a_len, mp_digit b,
                                        mp_digit *c);
+void MPI_ASM_DECL s_mpv_mul_d_add_propCT(const mp_digit *a,
+                                         mp_size a_len, mp_digit b,
+                                         mp_digit *c, mp_size c_len);
 void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a,
                                      mp_size a_len,
                                      mp_digit *sqrs);
diff --git a/security/nss/lib/freebl/mpi/mpi.c b/security/nss/lib/freebl/mpi/mpi.c
index 2e6cd84664..7749dc710f 100644
--- a/security/nss/lib/freebl/mpi/mpi.c
+++ b/security/nss/lib/freebl/mpi/mpi.c
@@ -10,6 +10,8 @@
 #include "mpi-priv.h"
 #include "mplogic.h"
 
+#include <assert.h>
+
 #if defined(__arm__) && \
     ((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__))
 /* 16-bit thumb or ARM v3 doesn't work inlined assember version */
@@ -802,15 +804,18 @@ CLEANUP:
 
 /* }}} */
 
-/* {{{ mp_mul(a, b, c) */
+/* {{{ s_mp_mulg(a, b, c) */
 
 /*
-  mp_mul(a, b, c)
+  s_mp_mulg(a, b, c)
 
-  Compute c = a * b.  All parameters may be identical.
+  Compute c = a * b.  All parameters may be identical. if constantTime is set,
+  then the operations are done in constant time. The original is mostly
+  constant time as long as s_mpv_mul_d_add() is constant time. This is true
+  of the x86 assembler, as well as the current c code.
  */
 mp_err
-mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
+s_mp_mulg(const mp_int *a, const mp_int *b, mp_int *c, int constantTime)
 {
     mp_digit *pb;
     mp_int tmp;
@@ -846,7 +851,14 @@ mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
         goto CLEANUP;
 
 #ifdef NSS_USE_COMBA
-    if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
+    /* comba isn't constant time because it clamps! If we cared
+     * (we needed a constant time version of multiply that was 'faster'
+     * we could easily pass constantTime down to the comba code and
+     * get it to skip the clamp... but here are assembler versions
+     * which add comba to platforms that can't compile the normal
+     * comba's imbedded assembler which would also need to change, so
+     * for now we just skip comba when we are running constant time. */
+    if (!constantTime && (MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
         if (MP_USED(a) == 4) {
             s_mp_mul_comba_4(a, b, c);
             goto CLEANUP;
@@ -876,13 +888,15 @@ mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
         mp_digit b_i = *pb++;
 
         /* Inner product:  Digits of a */
-        if (b_i)
+        if (constantTime || b_i)
             s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
         else
             MP_DIGIT(c, ib + useda) = b_i;
     }
 
-    s_mp_clamp(c);
+    if (!constantTime) {
+        s_mp_clamp(c);
+    }
 
     if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
         SIGN(c) = ZPOS;
@@ -892,10 +906,54 @@ mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
 CLEANUP:
     mp_clear(&tmp);
     return res;
+} /* end smp_mulg() */
+
+/* }}} */
+
+/* {{{ mp_mul(a, b, c) */
+
+/*
+  mp_mul(a, b, c)
+
+  Compute c = a * b.  All parameters may be identical.
+ */
+
+mp_err
+mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
+{
+    return s_mp_mulg(a, b, c, 0);
 } /* end mp_mul() */
 
 /* }}} */
 
+/* {{{ mp_mulCT(a, b, c) */
+
+/*
+  mp_mulCT(a, b, c)
+
+  Compute c = a * b. In constant time. Parameters may not be identical.
+  NOTE: a and b may be modified.
+ */
+
+mp_err
+mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize)
+{
+    mp_err res;
+
+    /* make the multiply values fixed length so multiply
+     * doesn't leak the length. at this point all the
+     * values are blinded, but once we finish we want the
+     * output size to be hidden (so no clamping the out put) */
+    MP_CHECKOK(s_mp_pad(a, setSize));
+    MP_CHECKOK(s_mp_pad(b, setSize));
+    MP_CHECKOK(s_mp_pad(c, 2 * setSize));
+    MP_CHECKOK(s_mp_mulg(a, b, c, 1));
+CLEANUP:
+    return res;
+} /* end mp_mulCT() */
+
+/* }}} */
+
 /* {{{ mp_sqr(a, sqr) */
 
 #if MP_SQUARE
@@ -1268,6 +1326,138 @@ mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
 
 /* }}} */
 
+/* {{{ s_mp_subCT_d(a, b, borrow, c) */
+
+/*
+  s_mp_subCT_d(a, b, borrow, c)
+
+  Compute c = (a -b) - subtract in constant time. returns borrow
+ */
+mp_digit
+s_mp_subCT_d(mp_digit a, mp_digit b, mp_digit borrow, mp_digit *ret)
+{
+    *ret = a - b - borrow;
+    return MP_CT_LTU(a, *ret) | (MP_CT_EQ(a, *ret) & borrow);
+} /*  s_mp_subCT_d() */
+
+/* }}} */
+
+/* {{{ mp_subCT(a, b, ret, borrow) */
+
+/* return ret= a - b and borrow in borrow. done in constant time.
+ * b could be modified.
+ */
+mp_err
+mp_subCT(const mp_int *a, mp_int *b, mp_int *ret, mp_digit *borrow)
+{
+    mp_size used_a = MP_USED(a);
+    mp_size i;
+    mp_err res;
+
+    MP_CHECKOK(s_mp_pad(b, used_a));
+    MP_CHECKOK(s_mp_pad(ret, used_a));
+    *borrow = 0;
+    for (i = 0; i < used_a; i++) {
+        *borrow = s_mp_subCT_d(MP_DIGIT(a, i), MP_DIGIT(b, i), *borrow,
+                               &MP_DIGIT(ret, i));
+    }
+
+    res = MP_OKAY;
+CLEANUP:
+    return res;
+} /*  end mp_subCT() */
+
+/* }}} */
+
+/* {{{ mp_selectCT(cond, a, b, ret) */
+
+/*
+ * return ret= cond ? a : b; cond should be either 0 or 1
+ */
+mp_err
+mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret)
+{
+    mp_size used_a = MP_USED(a);
+    mp_err res;
+    mp_size i;
+
+    cond *= MP_DIGIT_MAX;
+
+    /* we currently require these to be equal on input,
+     * we could use pad to extend one of them, but that might
+     * leak data as it wouldn't be constant time */
+    if (used_a != MP_USED(b)) {
+        return MP_BADARG;
+    }
+
+    MP_CHECKOK(s_mp_pad(ret, used_a));
+    for (i = 0; i < used_a; i++) {
+        MP_DIGIT(ret, i) = MP_CT_SEL_DIGIT(cond, MP_DIGIT(a, i), MP_DIGIT(b, i));
+    }
+    res = MP_OKAY;
+CLEANUP:
+    return res;
+} /* end mp_selectCT() */
+
+/* {{{ mp_reduceCT(a, m, c) */
+
+/*
+  mp_reduceCT(a, m, c)
+
+  Compute c = aR^-1 (mod m) in constant time.
+   input should be in montgomery form. If input is the
+   result of a montgomery multiply then out put will be
+   in mongomery form.
+   Result will be reduced to MP_USED(m), but not be
+   clamped.
+ */
+
+mp_err
+mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *c)
+{
+    mp_size used_m = MP_USED(m);
+    mp_size used_c = used_m * 2 + 1;
+    mp_digit *m_digits, *c_digits;
+    mp_size i;
+    mp_digit borrow, carry;
+    mp_err res;
+    mp_int sub;
+
+    MP_DIGITS(&sub) = 0;
+    MP_CHECKOK(mp_init_size(&sub, used_m));
+
+    if (a != c) {
+        MP_CHECKOK(mp_copy(a, c));
+    }
+    MP_CHECKOK(s_mp_pad(c, used_c));
+    m_digits = MP_DIGITS(m);
+    c_digits = MP_DIGITS(c);
+    for (i = 0; i < used_m; i++) {
+        mp_digit m_i = MP_DIGIT(c, i) * n0i;
+        s_mpv_mul_d_add_propCT(m_digits, used_m, m_i, c_digits++, used_c--);
+    }
+    s_mp_rshd(c, used_m);
+    /* MP_USED(c) should be used_m+1 with the high word being any carry
+     * from the previous multiply, save that carry and drop the high
+     * word for the substraction below */
+    carry = MP_DIGIT(c, used_m);
+    MP_DIGIT(c, used_m) = 0;
+    MP_USED(c) = used_m;
+    /* mp_subCT wants c and m to be the same size, we've already
+     * guarrenteed that in the previous statement, so mp_subCT won't actually
+     * modify m, so it's safe to recast */
+    MP_CHECKOK(mp_subCT(c, (mp_int *)m, &sub, &borrow));
+
+    /* we return c-m if c >= m no borrow or there was a borrow and a carry */
+    MP_CHECKOK(mp_selectCT(borrow ^ carry, c, &sub, c));
+    res = MP_OKAY;
+CLEANUP:
+    mp_clear(&sub);
+    return res;
+} /* end mp_reduceCT() */
+
+/* }}} */
+
 /* {{{ mp_mod_d(a, d, c) */
 
 /*
@@ -1384,6 +1574,37 @@ mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
 
 /* }}} */
 
+/* {{{ mp_mulmontmodCT(a, b, m, c) */
+
+/*
+  mp_mulmontmodCT(a, b, m, c)
+
+  Compute c = (a * b) mod m in constant time wrt a and b. either a or b
+  should be in montgomery form and the output is native. If both a and b
+  are in montgomery form, then the output will also be in montgomery form
+  and can be recovered with an mp_reduceCT call.
+  NOTE: a and b may be modified.
+ */
+
+mp_err
+mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i,
+                mp_int *c)
+{
+    mp_err res;
+
+    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
+
+    if ((res = mp_mulCT(a, b, c, MP_USED(m))) != MP_OKAY)
+        return res;
+
+    if ((res = mp_reduceCT(c, m, n0i, c)) != MP_OKAY)
+        return res;
+
+    return MP_OKAY;
+}
+
+/* }}} */
+
 /* {{{ mp_sqrmod(a, m, c) */
 
 #if MP_SQUARE
@@ -3941,14 +4162,62 @@ s_mp_mul(mp_int *a, const mp_int *b)
         a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
         a1b0 += a0b1;                                              \
         Phi += a1b0 >> MP_HALF_DIGIT_BIT;                          \
-        if (a1b0 < a0b1)                                           \
-            Phi += MP_HALF_RADIX;                                  \
+        Phi += (MP_CT_LTU(a1b0, a0b1)) << MP_HALF_DIGIT_BIT;       \
         a1b0 <<= MP_HALF_DIGIT_BIT;                                \
         Plo += a1b0;                                               \
-        if (Plo < a1b0)                                            \
-            ++Phi;                                                 \
+        Phi += MP_CT_LTU(Plo, a1b0);                               \
+    }
+#endif
+
+/* Constant time version of s_mpv_mul_d_add_prop.
+ * Presently, this is only used by the Constant time Montgomery arithmetic code. */
+/* c += a * b */
+void
+s_mpv_mul_d_add_propCT(const mp_digit *a, mp_size a_len, mp_digit b,
+                       mp_digit *c, mp_size c_len)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+    mp_digit d = 0;
+
+    c_len -= a_len;
+    /* Inner product:  Digits of a */
+    while (a_len--) {
+        mp_word w = ((mp_word)b * *a++) + *c + d;
+        *c++ = ACCUM(w);
+        d = CARRYOUT(w);
+    }
+
+    /* propagate the carry to the end, even if carry is zero */
+    while (c_len--) {
+        mp_word w = (mp_word)*c + d;
+        *c++ = ACCUM(w);
+        d = CARRYOUT(w);
+    }
+#else
+    mp_digit carry = 0;
+    c_len -= a_len;
+    while (a_len--) {
+        mp_digit a_i = *a++;
+        mp_digit a0b0, a1b1;
+        MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+        a0b0 += carry;
+        a1b1 += MP_CT_LTU(a0b0, carry);
+        a0b0 += a_i = *c;
+        a1b1 += MP_CT_LTU(a0b0, a_i);
+
+        *c++ = a0b0;
+        carry = a1b1;
+    }
+    /* propagate the carry to the end, even if carry is zero */
+    while (c_len--) {
+        mp_digit c_i = *c;
+        carry += c_i;
+        *c++ = carry;
+        carry = MP_CT_LTU(carry, c_i);
     }
 #endif
+}
 
 #if !defined(MP_ASSEMBLY_MULTIPLY)
 /* c = a * b */
@@ -3974,8 +4243,7 @@ s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
         MP_MUL_DxD(a_i, b, a1b1, a0b0);
 
         a0b0 += carry;
-        if (a0b0 < carry)
-            ++a1b1;
+        a1b1 += MP_CT_LTU(a0b0, carry);
         *c++ = a0b0;
         carry = a1b1;
     }
@@ -4007,11 +4275,9 @@ s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
         MP_MUL_DxD(a_i, b, a1b1, a0b0);
 
         a0b0 += carry;
-        if (a0b0 < carry)
-            ++a1b1;
+        a1b1 += MP_CT_LTU(a0b0, carry);
         a0b0 += a_i = *c;
-        if (a0b0 < a_i)
-            ++a1b1;
+        a1b1 += MP_CT_LTU(a0b0, a_i);
         *c++ = a0b0;
         carry = a1b1;
     }
diff --git a/security/nss/lib/freebl/mpi/mpi.h b/security/nss/lib/freebl/mpi/mpi.h
index 4ba9b6a4bf..dd129db0d6 100644
--- a/security/nss/lib/freebl/mpi/mpi.h
+++ b/security/nss/lib/freebl/mpi/mpi.h
@@ -150,6 +150,38 @@ typedef int mp_sword;
 /* This defines the maximum I/O base (minimum is 2)   */
 #define MP_MAX_RADIX 64
 
+/* Constant Time Macros on mp_digits */
+#define MP_CT_HIGH_TO_LOW(x) ((mp_digit)((mp_digit)(x) >> (MP_DIGIT_BIT - 1)))
+#define MP_CT_TRUE ((mp_digit)1)
+#define MP_CT_FALSE ((mp_digit)0)
+
+/* basic zero and non zero tests */
+#define MP_CT_NOT_ZERO(x) (MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x)))))
+#define MP_CT_ZERO(x) (MP_CT_TRUE ^ MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x)))))
+
+/* basic constant-time helper macro for equalities and inequalities.
+ * The inequalities will produce incorrect results if
+ * abs(a-b) >= MP_DIGIT_SIZE/2. This can be avoided if unsigned values stay
+ * within the range 0-MP_DIGIT_MAX/2. */
+#define MP_CT_EQ(a, b) MP_CT_ZERO(((a) ^ (b)))
+#define MP_CT_NE(a, b) MP_CT_NOT_ZERO(((a) ^ (b)))
+#define MP_CT_GT(a, b) MP_CT_HIGH_TO_LOW((b) - (a))
+#define MP_CT_LT(a, b) MP_CT_HIGH_TO_LOW((a) - (b))
+#define MP_CT_GE(a, b) (MP_CT_TRUE ^ MP_CT_LT(a, b))
+#define MP_CT_LE(a, b) (MP_CT_TRUE ^ MP_CT_GT(a, b))
+
+/* use constant time result to select a boolean value
+ * or an mp digit depending on the args */
+#define MP_CT_SEL(m, l, r) ((r) ^ ((m) & ((r) ^ (l))))
+#define MP_CT_SELB(m, l, r) MP_CT_SEL(m, l, r)      /* mask, l and r are booleans */
+#define MP_CT_SEL_DIGIT(m, l, r) MP_CT_SEL(m, l, r) /*mask, l, and r are mp_digit */
+
+/* full inequalities that work with full mp_digit values */
+#define MP_CT_OVERFLOW(a, b, c, d)           \
+    MP_CT_SELB(MP_CT_HIGH_TO_LOW((a) ^ (b)), \
+               (MP_CT_HIGH_TO_LOW(d)), c)
+#define MP_CT_LTU(a, b) MP_CT_OVERFLOW(a, b, MP_CT_LT(a, b), b)
+
 typedef struct {
     mp_sign sign;  /* sign of this quantity      */
     mp_size alloc; /* how many digits allocated  */
@@ -190,7 +222,9 @@ mp_err mp_neg(const mp_int *a, mp_int *b);
 /* Full arithmetic         */
 mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c);
 mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_subCT(const mp_int *a, mp_int *b, mp_int *c, mp_digit *borrow);
 mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize);
 #if MP_SQUARE
 mp_err mp_sqr(const mp_int *a, mp_int *b);
 #else
@@ -217,6 +251,12 @@ mp_err mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
 mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c);
 #endif /* MP_MODARITH */
 
+/* montgomery math */
+mp_err mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont);
+mp_digit mp_calculate_mont_n0i(const mp_int *N);
+mp_err mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *ct);
+mp_err mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i, mp_int *c);
+
 /* Comparisons             */
 int mp_cmp_z(const mp_int *a);
 int mp_cmp_d(const mp_int *a, mp_digit d);
@@ -224,6 +264,7 @@ int mp_cmp(const mp_int *a, const mp_int *b);
 int mp_cmp_mag(const mp_int *a, const mp_int *b);
 int mp_isodd(const mp_int *a);
 int mp_iseven(const mp_int *a);
+mp_err mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret);
 
 /* Number theoretic        */
 mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c);
diff --git a/security/nss/lib/freebl/mpi/mpmontg.c b/security/nss/lib/freebl/mpi/mpmontg.c
index 36ae51b352..d7a9958672 100644
--- a/security/nss/lib/freebl/mpi/mpmontg.c
+++ b/security/nss/lib/freebl/mpi/mpmontg.c
@@ -129,20 +129,27 @@ CLEANUP:
 }
 #endif
 
-STATIC
 mp_err
-s_mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont)
+mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont)
 {
     mp_err res;
 
     /* xMont = x * R mod N   where  N is modulus */
-    MP_CHECKOK(mp_copy(x, xMont));
-    MP_CHECKOK(s_mp_lshd(xMont, MP_USED(&mmm->N))); /* xMont = x << b */
-    MP_CHECKOK(mp_div(xMont, &mmm->N, 0, xMont));   /*         mod N */
+    if (x != xMont) {
+        MP_CHECKOK(mp_copy(x, xMont));
+    }
+    MP_CHECKOK(s_mp_lshd(xMont, MP_USED(N))); /* xMont = x << b */
+    MP_CHECKOK(mp_div(xMont, N, 0, xMont));   /*         mod N */
 CLEANUP:
     return res;
 }
 
+mp_digit
+mp_calculate_mont_n0i(const mp_int *N)
+{
+    return 0 - s_mp_invmod_radix(MP_DIGIT(N, 0));
+}
+
 #ifdef MP_USING_MONT_MULF
 
 /* the floating point multiply is already cache safe,
@@ -198,7 +205,7 @@ mp_exptmod_f(const mp_int *montBase,
     MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
 
     mp_set(&accum1, 1);
-    MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
+    MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
     MP_CHECKOK(s_mp_pad(&accum1, nLen));
 
     oddPowSize = 2 * nLen + 1;
@@ -478,7 +485,7 @@ mp_exptmod_i(const mp_int *montBase,
 
     /* set accumulator to montgomery residue of 1 */
     mp_set(&accum1, 1);
-    MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
+    MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
     pa1 = &accum1;
     pa2 = &accum2;
 
@@ -867,7 +874,7 @@ mp_exptmod_safe_i(const mp_int *montBase,
         MP_CHECKOK(mp_init_size(&accum[2], 3 * nLen + 2));
         MP_CHECKOK(mp_init_size(&accum[3], 3 * nLen + 2));
         mp_set(&accum[0], 1);
-        MP_CHECKOK(s_mp_to_mont(&accum[0], mmm, &accum[0]));
+        MP_CHECKOK(mp_to_mont(&accum[0], &(mmm->N), &accum[0]));
         MP_CHECKOK(mp_copy(montBase, &accum[1]));
         SQR(montBase, &accum[2]);
         MUL_NOWEAVE(montBase, &accum[2], &accum[3]);
@@ -886,7 +893,7 @@ mp_exptmod_safe_i(const mp_int *montBase,
     } else {
         if (first_window == 0) {
             mp_set(&accum1, 1);
-            MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
+            MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
         } else {
             /* assert first_window == 1? */
             MP_CHECKOK(mp_copy(montBase, &accum1));
@@ -1057,9 +1064,9 @@ mp_exptmod(const mp_int *inBase, const mp_int *exponent,
     /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX
     **        where n0 = least significant mp_digit of N, the modulus.
     */
-    mmm.n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(modulus, 0));
+    mmm.n0prime = mp_calculate_mont_n0i(modulus);
 
-    MP_CHECKOK(s_mp_to_mont(base, &mmm, &montBase));
+    MP_CHECKOK(mp_to_mont(base, modulus, &montBase));
 
     bits_in_exponent = mpl_significant_bits(exponent);
 #ifdef MP_USING_CACHE_SAFE_MOD_EXP