summaryrefslogtreecommitdiffstats
path: root/security/nss/lib/freebl/mpi/mpvalpha.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--security/nss/lib/freebl/mpi/mpvalpha.c183
1 files changed, 183 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/mpi/mpvalpha.c b/security/nss/lib/freebl/mpi/mpvalpha.c
new file mode 100644
index 0000000000..94e86eedb9
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpvalpha.c
@@ -0,0 +1,183 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#include <c_asm.h>
+
+#define MP_MUL_DxD(a, b, Phi, Plo) \
+ { \
+ Plo = asm("mulq %a0, %a1, %v0", a, b); \
+ Phi = asm("umulh %a0, %a1, %v0", a, b); \
+ }
+
+/* This is empty for the loop in s_mpv_mul_d */
+#define CARRY_ADD
+
+#define ONE_MUL \
+ a_i = *a++; \
+ MP_MUL_DxD(a_i, b, a1b1, a0b0); \
+ a0b0 += carry; \
+ if (a0b0 < carry) \
+ ++a1b1; \
+ CARRY_ADD \
+ *c++ = a0b0; \
+ carry = a1b1;
+
+#define FOUR_MUL \
+ ONE_MUL \
+ ONE_MUL \
+ ONE_MUL \
+ ONE_MUL
+
+#define SIXTEEN_MUL \
+ FOUR_MUL \
+ FOUR_MUL \
+ FOUR_MUL \
+ FOUR_MUL
+
+#define THIRTYTWO_MUL \
+ SIXTEEN_MUL \
+ SIXTEEN_MUL
+
+#define ONETWENTYEIGHT_MUL \
+ THIRTYTWO_MUL \
+ THIRTYTWO_MUL \
+ THIRTYTWO_MUL \
+ THIRTYTWO_MUL
+
+#define EXPAND_256(CALL) \
+ mp_digit carry = 0; \
+ mp_digit a_i; \
+ mp_digit a0b0, a1b1; \
+ if (a_len & 255) { \
+ if (a_len & 1) { \
+ ONE_MUL \
+ } \
+ if (a_len & 2) { \
+ ONE_MUL \
+ ONE_MUL \
+ } \
+ if (a_len & 4) { \
+ FOUR_MUL \
+ } \
+ if (a_len & 8) { \
+ FOUR_MUL \
+ FOUR_MUL \
+ } \
+ if (a_len & 16) { \
+ SIXTEEN_MUL \
+ } \
+ if (a_len & 32) { \
+ THIRTYTWO_MUL \
+ } \
+ if (a_len & 64) { \
+ THIRTYTWO_MUL \
+ THIRTYTWO_MUL \
+ } \
+ if (a_len & 128) { \
+ ONETWENTYEIGHT_MUL \
+ } \
+ a_len = a_len & (-256); \
+ } \
+ if (a_len >= 256) { \
+ carry = CALL(a, a_len, b, c, carry); \
+ c += a_len; \
+ }
+
+#define FUNC_NAME(NAME) \
+ mp_digit NAME(const mp_digit *a, \
+ mp_size a_len, \
+ mp_digit b, mp_digit *c, \
+ mp_digit carry)
+
+#define DECLARE_MUL_256(FNAME) \
+ FUNC_NAME(FNAME) \
+ { \
+ mp_digit a_i; \
+ mp_digit a0b0, a1b1; \
+ while (a_len) { \
+ ONETWENTYEIGHT_MUL \
+ ONETWENTYEIGHT_MUL \
+ a_len -= 256; \
+ } \
+ return carry; \
+ }
+
+/* Expanding the loop in s_mpv_mul_d appeared to slow down the
+ (admittedly) small number of tests (i.e., timetest) used to
+ measure performance, so this define disables that optimization. */
+#define DO_NOT_EXPAND 1
+
+/* Need forward declaration so it can be instantiated after
+ the routine that uses it; this helps locality somewhat */
+#if !defined(DO_NOT_EXPAND)
+FUNC_NAME(s_mpv_mul_d_MUL256);
+#endif
+
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c)
+{
+#if defined(DO_NOT_EXPAND)
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+#else
+ EXPAND_256(s_mpv_mul_d_MUL256)
+#endif
+ *c = carry;
+}
+
+#if !defined(DO_NOT_EXPAND)
+DECLARE_MUL_256(s_mpv_mul_d_MUL256)
+#endif
+
+#undef CARRY_ADD
+/* This is redefined for the loop in s_mpv_mul_d_add */
+#define CARRY_ADD \
+ a0b0 += a_i = *c; \
+ if (a0b0 < a_i) \
+ ++a1b1;
+
+/* Need forward declaration so it can be instantiated between the
+ two routines that use it; this helps locality somewhat */
+FUNC_NAME(s_mpv_mul_d_add_MUL256);
+
+/* c += a * b */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c)
+{
+ EXPAND_256(s_mpv_mul_d_add_MUL256)
+ *c = carry;
+}
+
+/* Instantiate multiply 256 routine here */
+DECLARE_MUL_256(s_mpv_mul_d_add_MUL256)
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c)
+{
+ EXPAND_256(s_mpv_mul_d_add_MUL256)
+ while (carry) {
+ mp_digit c_i = *c;
+ carry += c_i;
+ *c++ = carry;
+ carry = carry < c_i;
+ }
+}