diff options
Diffstat (limited to 'comm/third_party/botan/src/lib/math/mp/mp_asmi.h')
-rw-r--r-- | comm/third_party/botan/src/lib/math/mp/mp_asmi.h | 611 |
1 files changed, 611 insertions, 0 deletions
diff --git a/comm/third_party/botan/src/lib/math/mp/mp_asmi.h b/comm/third_party/botan/src/lib/math/mp/mp_asmi.h new file mode 100644 index 0000000000..e1518d51c7 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/mp/mp_asmi.h @@ -0,0 +1,611 @@ +/* +* Lowest Level MPI Algorithms +* (C) 1999-2010 Jack Lloyd +* 2006 Luca Piccarreta +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_MP_ASM_INTERNAL_H_ +#define BOTAN_MP_ASM_INTERNAL_H_ + +#include <botan/internal/mp_madd.h> + +namespace Botan { + +#if defined(BOTAN_MP_USE_X86_32_ASM) + +#define ADDSUB2_OP(OPERATION, INDEX) \ + ASM("movl 4*" #INDEX "(%[y]), %[carry]") \ + ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \ + +#define ADDSUB3_OP(OPERATION, INDEX) \ + ASM("movl 4*" #INDEX "(%[x]), %[carry]") \ + ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \ + ASM("movl %[carry], 4*" #INDEX "(%[z])") \ + +#define LINMUL_OP(WRITE_TO, INDEX) \ + ASM("movl 4*" #INDEX "(%[x]),%%eax") \ + ASM("mull %[y]") \ + ASM("addl %[carry],%%eax") \ + ASM("adcl $0,%%edx") \ + ASM("movl %%edx,%[carry]") \ + ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])") + +#define MULADD_OP(IGNORED, INDEX) \ + ASM("movl 4*" #INDEX "(%[x]),%%eax") \ + ASM("mull %[y]") \ + ASM("addl %[carry],%%eax") \ + ASM("adcl $0,%%edx") \ + ASM("addl 4*" #INDEX "(%[z]),%%eax") \ + ASM("adcl $0,%%edx") \ + ASM("movl %%edx,%[carry]") \ + ASM("movl %%eax, 4*" #INDEX " (%[z])") + +#define ADD_OR_SUBTRACT(CORE_CODE) \ + ASM("rorl %[carry]") \ + CORE_CODE \ + ASM("sbbl %[carry],%[carry]") \ + ASM("negl %[carry]") + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + +#define ADDSUB2_OP(OPERATION, INDEX) \ + ASM("movq 8*" #INDEX "(%[y]), %[carry]") \ + ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \ + +#define ADDSUB3_OP(OPERATION, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]), %[carry]") \ + ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \ + ASM("movq %[carry], 8*" #INDEX "(%[z])") \ + +#define LINMUL_OP(WRITE_TO, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]),%%rax") \ + ASM("mulq %[y]") \ + ASM("addq %[carry],%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("movq %%rdx,%[carry]") \ + ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])") + +#define MULADD_OP(IGNORED, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]),%%rax") \ + ASM("mulq %[y]") \ + ASM("addq %[carry],%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("addq 8*" #INDEX "(%[z]),%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("movq %%rdx,%[carry]") \ + ASM("movq %%rax, 8*" #INDEX " (%[z])") + +#define ADD_OR_SUBTRACT(CORE_CODE) \ + ASM("rorq %[carry]") \ + CORE_CODE \ + ASM("sbbq %[carry],%[carry]") \ + ASM("negq %[carry]") + +#endif + +#if defined(ADD_OR_SUBTRACT) + +#define ASM(x) x "\n\t" + +#define DO_8_TIMES(MACRO, ARG) \ + MACRO(ARG, 0) \ + MACRO(ARG, 1) \ + MACRO(ARG, 2) \ + MACRO(ARG, 3) \ + MACRO(ARG, 4) \ + MACRO(ARG, 5) \ + MACRO(ARG, 6) \ + MACRO(ARG, 7) + +#endif + +/* +* Word Addition +*/ +inline word word_add(word x, word y, word* carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + +#else + word z = x + y; + word c1 = (z < x); + z += *carry; + *carry = c1 | (z < *carry); + return z; +#endif + } + +/* +* Eight Word Block Addition, Two Argument +*/ +inline word word8_add2(word x[8], const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + +#else + x[0] = word_add(x[0], y[0], &carry); + x[1] = word_add(x[1], y[1], &carry); + x[2] = word_add(x[2], y[2], &carry); + x[3] = word_add(x[3], y[3], &carry); + x[4] = word_add(x[4], y[4], &carry); + x[5] = word_add(x[5], y[5], &carry); + x[6] = word_add(x[6], y[6], &carry); + x[7] = word_add(x[7], y[7], &carry); + return carry; +#endif + } + +/* +* Eight Word Block Addition, Three Argument +*/ +inline word word8_add3(word z[8], const word x[8], + const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + +#else + z[0] = word_add(x[0], y[0], &carry); + z[1] = word_add(x[1], y[1], &carry); + z[2] = word_add(x[2], y[2], &carry); + z[3] = word_add(x[3], y[3], &carry); + z[4] = word_add(x[4], y[4], &carry); + z[5] = word_add(x[5], y[5], &carry); + z[6] = word_add(x[6], y[6], &carry); + z[7] = word_add(x[7], y[7], &carry); + return carry; +#endif + } + +/* +* Word Subtraction +*/ +inline word word_sub(word x, word y, word* carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + +#else + word t0 = x - y; + word c1 = (t0 > x); + word z = t0 - *carry; + *carry = c1 | (z > t0); + return z; +#endif + } + +/* +* Eight Word Block Subtraction, Two Argument +*/ +inline word word8_sub2(word x[8], const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + +#else + x[0] = word_sub(x[0], y[0], &carry); + x[1] = word_sub(x[1], y[1], &carry); + x[2] = word_sub(x[2], y[2], &carry); + x[3] = word_sub(x[3], y[3], &carry); + x[4] = word_sub(x[4], y[4], &carry); + x[5] = word_sub(x[5], y[5], &carry); + x[6] = word_sub(x[6], y[6], &carry); + x[7] = word_sub(x[7], y[7], &carry); + return carry; +#endif + } + +/* +* Eight Word Block Subtraction, Two Argument +*/ +inline word word8_sub2_rev(word x[8], const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) + : [carry]"=r"(carry) + : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) + : [carry]"=r"(carry) + : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) + : "cc", "memory"); + return carry; + +#else + x[0] = word_sub(y[0], x[0], &carry); + x[1] = word_sub(y[1], x[1], &carry); + x[2] = word_sub(y[2], x[2], &carry); + x[3] = word_sub(y[3], x[3], &carry); + x[4] = word_sub(y[4], x[4], &carry); + x[5] = word_sub(y[5], x[5], &carry); + x[6] = word_sub(y[6], x[6], &carry); + x[7] = word_sub(y[7], x[7], &carry); + return carry; +#endif + } + +/* +* Eight Word Block Subtraction, Three Argument +*/ +inline word word8_sub3(word z[8], const word x[8], + const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + +#else + z[0] = word_sub(x[0], y[0], &carry); + z[1] = word_sub(x[1], y[1], &carry); + z[2] = word_sub(x[2], y[2], &carry); + z[3] = word_sub(x[3], y[3], &carry); + z[4] = word_sub(x[4], y[4], &carry); + z[5] = word_sub(x[5], y[5], &carry); + z[6] = word_sub(x[6], y[6], &carry); + z[7] = word_sub(x[7], y[7], &carry); + return carry; +#endif + } + +/* +* Eight Word Block Linear Multiplication +*/ +inline word word8_linmul2(word x[8], word y, word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + DO_8_TIMES(LINMUL_OP, "x") + : [carry]"=r"(carry) + : [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%eax", "%edx"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + DO_8_TIMES(LINMUL_OP, "x") + : [carry]"=r"(carry) + : [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + +#else + x[0] = word_madd2(x[0], y, &carry); + x[1] = word_madd2(x[1], y, &carry); + x[2] = word_madd2(x[2], y, &carry); + x[3] = word_madd2(x[3], y, &carry); + x[4] = word_madd2(x[4], y, &carry); + x[5] = word_madd2(x[5], y, &carry); + x[6] = word_madd2(x[6], y, &carry); + x[7] = word_madd2(x[7], y, &carry); + return carry; +#endif + } + +/* +* Eight Word Block Linear Multiplication +*/ +inline word word8_linmul3(word z[8], const word x[8], word y, word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + DO_8_TIMES(LINMUL_OP, "z") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%eax", "%edx"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + asm( + DO_8_TIMES(LINMUL_OP, "z") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + +#else + z[0] = word_madd2(x[0], y, &carry); + z[1] = word_madd2(x[1], y, &carry); + z[2] = word_madd2(x[2], y, &carry); + z[3] = word_madd2(x[3], y, &carry); + z[4] = word_madd2(x[4], y, &carry); + z[5] = word_madd2(x[5], y, &carry); + z[6] = word_madd2(x[6], y, &carry); + z[7] = word_madd2(x[7], y, &carry); + return carry; +#endif + } + +/* +* Eight Word Block Multiply/Add +*/ +inline word word8_madd3(word z[8], const word x[8], word y, word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + DO_8_TIMES(MULADD_OP, "") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%eax", "%edx"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + DO_8_TIMES(MULADD_OP, "") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + +#else + z[0] = word_madd3(x[0], y, z[0], &carry); + z[1] = word_madd3(x[1], y, z[1], &carry); + z[2] = word_madd3(x[2], y, z[2], &carry); + z[3] = word_madd3(x[3], y, z[3], &carry); + z[4] = word_madd3(x[4], y, z[4], &carry); + z[5] = word_madd3(x[5], y, z[5], &carry); + z[6] = word_madd3(x[6], y, z[6], &carry); + z[7] = word_madd3(x[7], y, z[7], &carry); + return carry; +#endif + } + +/* +* Multiply-Add Accumulator +* (w2,w1,w0) += x * y +*/ +inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + word z0 = 0, z1 = 0; + + asm("mull %[y]" + : "=a"(z0),"=d"(z1) + : "a"(x), [y]"rm"(y) + : "cc"); + + asm(R"( + addl %[z0],%[w0] + adcl %[z1],%[w1] + adcl $0,%[w2] + )" + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + word z0 = 0, z1 = 0; + + asm("mulq %[y]" + : "=a"(z0),"=d"(z1) + : "a"(x), [y]"rm"(y) + : "cc"); + + asm(R"( + addq %[z0],%[w0] + adcq %[z1],%[w1] + adcq $0,%[w2] + )" + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#else + word carry = *w0; + *w0 = word_madd2(x, y, &carry); + *w1 += carry; + *w2 += (*w1 < carry); +#endif + } + +/* +* 3-word addition +* (w2,w1,w0) += x +*/ +inline void word3_add(word* w2, word* w1, word* w0, word x) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm(R"( + addl %[x],%[w0] + adcl $0,%[w1] + adcl $0,%[w2] + )" + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm(R"( + addq %[x],%[w0] + adcq $0,%[w1] + adcq $0,%[w2] + )" + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#else + *w0 += x; + word c1 = (*w0 < x); + *w1 += c1; + word c2 = (*w1 < c1); + *w2 += c2; +#endif + } + +/* +* Multiply-Add Accumulator +* (w2,w1,w0) += 2 * x * y +*/ +inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + + word z0 = 0, z1 = 0; + + asm("mull %[y]" + : "=a"(z0),"=d"(z1) + : "a"(x), [y]"rm"(y) + : "cc"); + + asm(R"( + addl %[z0],%[w0] + adcl %[z1],%[w1] + adcl $0,%[w2] + + addl %[z0],%[w0] + adcl %[z1],%[w1] + adcl $0,%[w2] + )" + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + word z0 = 0, z1 = 0; + + asm("mulq %[y]" + : "=a"(z0),"=d"(z1) + : "a"(x), [y]"rm"(y) + : "cc"); + + asm(R"( + addq %[z0],%[w0] + adcq %[z1],%[w1] + adcq $0,%[w2] + + addq %[z0],%[w0] + adcq %[z1],%[w1] + adcq $0,%[w2] + )" + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#else + word carry = 0; + x = word_madd2(x, y, &carry); + y = carry; + + word top = (y >> (BOTAN_MP_WORD_BITS-1)); + y <<= 1; + y |= (x >> (BOTAN_MP_WORD_BITS-1)); + x <<= 1; + + carry = 0; + *w0 = word_add(*w0, x, &carry); + *w1 = word_add(*w1, y, &carry); + *w2 = word_add(*w2, top, &carry); +#endif + } + +#if defined(ASM) + #undef ASM + #undef DO_8_TIMES + #undef ADD_OR_SUBTRACT + #undef ADDSUB2_OP + #undef ADDSUB3_OP + #undef LINMUL_OP + #undef MULADD_OP +#endif + +} + +#endif |