diff options
Diffstat (limited to 'security/nss/lib/freebl/mpi/mpv_sparc.c')
-rw-r--r-- | security/nss/lib/freebl/mpi/mpv_sparc.c | 221 |
1 files changed, 221 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/mpi/mpv_sparc.c b/security/nss/lib/freebl/mpi/mpv_sparc.c new file mode 100644 index 0000000000..423311b65b --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpv_sparc.c @@ -0,0 +1,221 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "vis_proto.h" + +/***************************************************************/ + +typedef int t_s32; +typedef unsigned int t_u32; +#if defined(__sparcv9) +typedef long t_s64; +typedef unsigned long t_u64; +#else +typedef long long t_s64; +typedef unsigned long long t_u64; +#endif +typedef double t_d64; + +/***************************************************************/ + +typedef union { + t_d64 d64; + struct { + t_s32 i0; + t_s32 i1; + } i32s; +} d64_2_i32; + +/***************************************************************/ + +#define BUFF_SIZE 256 + +#define A_BITS 19 +#define A_MASK ((1 << A_BITS) - 1) + +/***************************************************************/ + +static t_u64 mask_cnst[] = { + 0x8000000080000000ull +}; + +/***************************************************************/ + +#define DEF_VARS(N) \ + t_d64 *py = (t_d64 *)y; \ + t_d64 mask = *((t_d64 *)mask_cnst); \ + t_d64 ca = (1u << 31) - 1; \ + t_d64 da = (t_d64)a; \ + t_s64 buff[N], s; \ + d64_2_i32 dy + +/***************************************************************/ + +#define MUL_U32_S64_2(i) \ + dy.d64 = vis_fxnor(mask, py[i]); \ + buff[2 * (i)] = (ca - (t_d64)dy.i32s.i0) * da; \ + buff[2 * (i) + 1] = (ca - (t_d64)dy.i32s.i1) * da + +#define MUL_U32_S64_2_D(i) \ + dy.d64 = vis_fxnor(mask, py[i]); \ + d0 = ca - (t_d64)dy.i32s.i0; \ + d1 = ca - (t_d64)dy.i32s.i1; \ + buff[4 * (i)] = (t_s64)(d0 * da); \ + buff[4 * (i) + 1] = (t_s64)(d0 * db); \ + buff[4 * (i) + 2] = (t_s64)(d1 * da); \ + buff[4 * (i) + 3] = (t_s64)(d1 * db) + +/***************************************************************/ + +#define ADD_S64_U32(i) \ + s = buff[i] + x[i] + c; \ + z[i] = s; \ + c = (s >> 32) + +#define ADD_S64_U32_D(i) \ + s = buff[2 * (i)] + (((t_s64)(buff[2 * (i) + 1])) << A_BITS) + x[i] + uc; \ + z[i] = s; \ + uc = ((t_u64)s >> 32) + +/***************************************************************/ + +#define MUL_U32_S64_8(i) \ + MUL_U32_S64_2(i); \ + MUL_U32_S64_2(i + 1); \ + MUL_U32_S64_2(i + 2); \ + MUL_U32_S64_2(i + 3) + +#define MUL_U32_S64_D_8(i) \ + MUL_U32_S64_2_D(i); \ + MUL_U32_S64_2_D(i + 1); \ + MUL_U32_S64_2_D(i + 2); \ + MUL_U32_S64_2_D(i + 3) + +/***************************************************************/ + +#define ADD_S64_U32_8(i) \ + ADD_S64_U32(i); \ + ADD_S64_U32(i + 1); \ + ADD_S64_U32(i + 2); \ + ADD_S64_U32(i + 3); \ + ADD_S64_U32(i + 4); \ + ADD_S64_U32(i + 5); \ + ADD_S64_U32(i + 6); \ + ADD_S64_U32(i + 7) + +#define ADD_S64_U32_D_8(i) \ + ADD_S64_U32_D(i); \ + ADD_S64_U32_D(i + 1); \ + ADD_S64_U32_D(i + 2); \ + ADD_S64_U32_D(i + 3); \ + ADD_S64_U32_D(i + 4); \ + ADD_S64_U32_D(i + 5); \ + ADD_S64_U32_D(i + 6); \ + ADD_S64_U32_D(i + 7) + +/***************************************************************/ + +t_u32 +mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a) +{ + if (a < (1 << A_BITS)) { + + if (n == 8) { + DEF_VARS(8); + t_s32 c = 0; + + MUL_U32_S64_8(0); + ADD_S64_U32_8(0); + + return c; + + } else if (n == 16) { + DEF_VARS(16); + t_s32 c = 0; + + MUL_U32_S64_8(0); + MUL_U32_S64_8(4); + ADD_S64_U32_8(0); + ADD_S64_U32_8(8); + + return c; + + } else { + DEF_VARS(BUFF_SIZE); + t_s32 i, c = 0; + +#pragma pipeloop(0) + for (i = 0; i < (n + 1) / 2; i++) { + MUL_U32_S64_2(i); + } + +#pragma pipeloop(0) + for (i = 0; i < n; i++) { + ADD_S64_U32(i); + } + + return c; + } + } else { + + if (n == 8) { + DEF_VARS(2 * 8); + t_d64 d0, d1, db; + t_u32 uc = 0; + + da = (t_d64)(a & A_MASK); + db = (t_d64)(a >> A_BITS); + + MUL_U32_S64_D_8(0); + ADD_S64_U32_D_8(0); + + return uc; + + } else if (n == 16) { + DEF_VARS(2 * 16); + t_d64 d0, d1, db; + t_u32 uc = 0; + + da = (t_d64)(a & A_MASK); + db = (t_d64)(a >> A_BITS); + + MUL_U32_S64_D_8(0); + MUL_U32_S64_D_8(4); + ADD_S64_U32_D_8(0); + ADD_S64_U32_D_8(8); + + return uc; + + } else { + DEF_VARS(2 * BUFF_SIZE); + t_d64 d0, d1, db; + t_u32 i, uc = 0; + + da = (t_d64)(a & A_MASK); + db = (t_d64)(a >> A_BITS); + +#pragma pipeloop(0) + for (i = 0; i < (n + 1) / 2; i++) { + MUL_U32_S64_2_D(i); + } + +#pragma pipeloop(0) + for (i = 0; i < n; i++) { + ADD_S64_U32_D(i); + } + + return uc; + } + } +} + +/***************************************************************/ + +t_u32 +mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a) +{ + return mul_add(x, x, y, n, a); +} + +/***************************************************************/ |