diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /comm/third_party/botan/src/lib/math | |
parent | Initial commit. (diff) | |
download | thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'comm/third_party/botan/src/lib/math')
41 files changed, 14624 insertions, 0 deletions
diff --git a/comm/third_party/botan/src/lib/math/bigint/big_code.cpp b/comm/third_party/botan/src/lib/math/bigint/big_code.cpp new file mode 100644 index 0000000000..6eb27549e8 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/bigint/big_code.cpp @@ -0,0 +1,200 @@ +/* +* BigInt Encoding/Decoding +* (C) 1999-2010,2012,2019 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/bigint.h> +#include <botan/divide.h> +#include <botan/charset.h> +#include <botan/hex.h> + +namespace Botan { + +std::string BigInt::to_dec_string() const + { + BigInt copy = *this; + copy.set_sign(Positive); + + uint8_t remainder; + std::vector<uint8_t> digits; + + while(copy > 0) + { + ct_divide_u8(copy, 10, copy, remainder); + digits.push_back(remainder); + } + + std::string s; + + for(auto i = digits.rbegin(); i != digits.rend(); ++i) + { + s.push_back(Charset::digit2char(*i)); + } + + if(s.empty()) + s += "0"; + + return s; + } + +std::string BigInt::to_hex_string() const + { + const std::vector<uint8_t> bits = BigInt::encode(*this); + if(bits.empty()) + return "00"; + else + return hex_encode(bits); + } + +/* +* Encode a BigInt +*/ +void BigInt::encode(uint8_t output[], const BigInt& n, Base base) + { + secure_vector<uint8_t> enc = n.encode_locked(base); + copy_mem(output, enc.data(), enc.size()); + } + +namespace { + +std::vector<uint8_t> str_to_vector(const std::string& s) + { + std::vector<uint8_t> v(s.size()); + std::memcpy(v.data(), s.data(), s.size()); + return v; + } + +secure_vector<uint8_t> str_to_lvector(const std::string& s) + { + secure_vector<uint8_t> v(s.size()); + std::memcpy(v.data(), s.data(), s.size()); + return v; + } + +} + +/* +* Encode a BigInt +*/ +std::vector<uint8_t> BigInt::encode(const BigInt& n, Base base) + { + if(base == Binary) + return BigInt::encode(n); + else if(base == Hexadecimal) + return str_to_vector(n.to_hex_string()); + else if(base == Decimal) + return str_to_vector(n.to_dec_string()); + else + throw Invalid_Argument("Unknown BigInt encoding base"); + } + +/* +* Encode a BigInt +*/ +secure_vector<uint8_t> BigInt::encode_locked(const BigInt& n, Base base) + { + if(base == Binary) + return BigInt::encode_locked(n); + else if(base == Hexadecimal) + return str_to_lvector(n.to_hex_string()); + else if(base == Decimal) + return str_to_lvector(n.to_dec_string()); + else + throw Invalid_Argument("Unknown BigInt encoding base"); + } + +/* +* Encode a BigInt, with leading 0s if needed +*/ +secure_vector<uint8_t> BigInt::encode_1363(const BigInt& n, size_t bytes) + { + if(n.bytes() > bytes) + throw Encoding_Error("encode_1363: n is too large to encode properly"); + + secure_vector<uint8_t> output(bytes); + n.binary_encode(output.data(), output.size()); + return output; + } + +//static +void BigInt::encode_1363(uint8_t output[], size_t bytes, const BigInt& n) + { + if(n.bytes() > bytes) + throw Encoding_Error("encode_1363: n is too large to encode properly"); + + n.binary_encode(output, bytes); + } + +/* +* Encode two BigInt, with leading 0s if needed, and concatenate +*/ +secure_vector<uint8_t> BigInt::encode_fixed_length_int_pair(const BigInt& n1, const BigInt& n2, size_t bytes) + { + if(n1.bytes() > bytes || n2.bytes() > bytes) + throw Encoding_Error("encode_fixed_length_int_pair: values too large to encode properly"); + secure_vector<uint8_t> output(2 * bytes); + n1.binary_encode(output.data() , bytes); + n2.binary_encode(output.data() + bytes, bytes); + return output; + } + +/* +* Decode a BigInt +*/ +BigInt BigInt::decode(const uint8_t buf[], size_t length, Base base) + { + BigInt r; + if(base == Binary) + { + r.binary_decode(buf, length); + } + else if(base == Hexadecimal) + { + secure_vector<uint8_t> binary; + + if(length % 2) + { + // Handle lack of leading 0 + const char buf0_with_leading_0[2] = + { '0', static_cast<char>(buf[0]) }; + + binary = hex_decode_locked(buf0_with_leading_0, 2); + + binary += hex_decode_locked(cast_uint8_ptr_to_char(&buf[1]), + length - 1, + false); + } + else + binary = hex_decode_locked(cast_uint8_ptr_to_char(buf), + length, false); + + r.binary_decode(binary.data(), binary.size()); + } + else if(base == Decimal) + { + for(size_t i = 0; i != length; ++i) + { + if(Charset::is_space(buf[i])) + continue; + + if(!Charset::is_digit(buf[i])) + throw Invalid_Argument("BigInt::decode: " + "Invalid character in decimal input"); + + const uint8_t x = Charset::char2digit(buf[i]); + + if(x >= 10) + throw Invalid_Argument("BigInt: Invalid decimal string"); + + r *= 10; + r += x; + } + } + else + throw Invalid_Argument("Unknown BigInt decoding method"); + return r; + } + +} diff --git a/comm/third_party/botan/src/lib/math/bigint/big_io.cpp b/comm/third_party/botan/src/lib/math/bigint/big_io.cpp new file mode 100644 index 0000000000..b31315eac4 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/bigint/big_io.cpp @@ -0,0 +1,62 @@ +/* +* BigInt Input/Output +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/bigint.h> +#include <istream> +#include <ostream> + +namespace Botan { + +/* +* Write the BigInt into a stream +*/ +std::ostream& operator<<(std::ostream& stream, const BigInt& n) + { + size_t base = 10; + if(stream.flags() & std::ios::hex) + base = 16; + if(stream.flags() & std::ios::oct) + throw Invalid_Argument("Octal output of BigInt not supported"); + + if(n == 0) + stream.write("0", 1); + else + { + if(n < 0) + stream.write("-", 1); + + std::string enc; + + if(base == 10) + enc = n.to_dec_string(); + else + enc = n.to_hex_string(); + + size_t skip = 0; + while(skip < enc.size() && enc[skip] == '0') + ++skip; + stream.write(&enc[skip], enc.size() - skip); + } + if(!stream.good()) + throw Stream_IO_Error("BigInt output operator has failed"); + return stream; + } + +/* +* Read the BigInt from a stream +*/ +std::istream& operator>>(std::istream& stream, BigInt& n) + { + std::string str; + std::getline(stream, str); + if(stream.bad() || (stream.fail() && !stream.eof())) + throw Stream_IO_Error("BigInt input operator has failed"); + n = BigInt(str); + return stream; + } + +} diff --git a/comm/third_party/botan/src/lib/math/bigint/big_ops2.cpp b/comm/third_party/botan/src/lib/math/bigint/big_ops2.cpp new file mode 100644 index 0000000000..cc85f5e96d --- /dev/null +++ b/comm/third_party/botan/src/lib/math/bigint/big_ops2.cpp @@ -0,0 +1,314 @@ +/* +* (C) 1999-2007,2018 Jack Lloyd +* 2016 Matthias Gierlings +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/bigint.h> +#include <botan/internal/mp_core.h> +#include <botan/internal/bit_ops.h> +#include <algorithm> + +namespace Botan { + +BigInt& BigInt::add(const word y[], size_t y_words, Sign y_sign) + { + const size_t x_sw = sig_words(); + + grow_to(std::max(x_sw, y_words) + 1); + + if(sign() == y_sign) + { + bigint_add2(mutable_data(), size() - 1, y, y_words); + } + else + { + const int32_t relative_size = bigint_cmp(data(), x_sw, y, y_words); + + if(relative_size >= 0) + { + // *this >= y + bigint_sub2(mutable_data(), x_sw, y, y_words); + } + else + { + // *this < y + bigint_sub2_rev(mutable_data(), y, y_words); + } + + //this->sign_fixup(relative_size, y_sign); + if(relative_size < 0) + set_sign(y_sign); + else if(relative_size == 0) + set_sign(Positive); + } + + return (*this); + } + +BigInt& BigInt::mod_add(const BigInt& s, const BigInt& mod, secure_vector<word>& ws) + { + if(this->is_negative() || s.is_negative() || mod.is_negative()) + throw Invalid_Argument("BigInt::mod_add expects all arguments are positive"); + + BOTAN_DEBUG_ASSERT(*this < mod); + BOTAN_DEBUG_ASSERT(s < mod); + + /* + t + s or t + s - p == t - (p - s) + + So first compute ws = p - s + + Then compute t + s and t - ws + + If t - ws does not borrow, then that is the correct valued + */ + + const size_t mod_sw = mod.sig_words(); + BOTAN_ARG_CHECK(mod_sw > 0, "BigInt::mod_add modulus must be positive"); + + this->grow_to(mod_sw); + s.grow_to(mod_sw); + + // First mod_sw for p - s, 2*mod_sw for bigint_addsub workspace + if(ws.size() < 3*mod_sw) + ws.resize(3*mod_sw); + + word borrow = bigint_sub3(&ws[0], mod.data(), mod_sw, s.data(), mod_sw); + BOTAN_DEBUG_ASSERT(borrow == 0); + + // Compute t - ws + borrow = bigint_sub3(&ws[mod_sw], this->data(), mod_sw, &ws[0], mod_sw); + + // Compute t + s + bigint_add3_nc(&ws[mod_sw*2], this->data(), mod_sw, s.data(), mod_sw); + + CT::conditional_copy_mem(borrow, &ws[0], &ws[mod_sw*2], &ws[mod_sw], mod_sw); + set_words(&ws[0], mod_sw); + + return (*this); + } + +BigInt& BigInt::mod_sub(const BigInt& s, const BigInt& mod, secure_vector<word>& ws) + { + if(this->is_negative() || s.is_negative() || mod.is_negative()) + throw Invalid_Argument("BigInt::mod_sub expects all arguments are positive"); + + // We are assuming in this function that *this and s are no more than mod_sw words long + BOTAN_DEBUG_ASSERT(*this < mod); + BOTAN_DEBUG_ASSERT(s < mod); + + const size_t mod_sw = mod.sig_words(); + + this->grow_to(mod_sw); + s.grow_to(mod_sw); + + if(ws.size() < mod_sw) + ws.resize(mod_sw); + + if(mod_sw == 4) + bigint_mod_sub_n<4>(mutable_data(), s.data(), mod.data(), ws.data()); + else if(mod_sw == 6) + bigint_mod_sub_n<6>(mutable_data(), s.data(), mod.data(), ws.data()); + else + bigint_mod_sub(mutable_data(), s.data(), mod.data(), mod_sw, ws.data()); + + return (*this); + } + +BigInt& BigInt::mod_mul(uint8_t y, const BigInt& mod, secure_vector<word>& ws) + { + BOTAN_ARG_CHECK(this->is_negative() == false, "*this must be positive"); + BOTAN_ARG_CHECK(y < 16, "y too large"); + + BOTAN_DEBUG_ASSERT(*this < mod); + + *this *= static_cast<word>(y); + this->reduce_below(mod, ws); + return (*this); + } + +BigInt& BigInt::rev_sub(const word y[], size_t y_sw, secure_vector<word>& ws) + { + if(this->sign() != BigInt::Positive) + throw Invalid_State("BigInt::sub_rev requires this is positive"); + + const size_t x_sw = this->sig_words(); + + ws.resize(std::max(x_sw, y_sw)); + clear_mem(ws.data(), ws.size()); + + const int32_t relative_size = bigint_sub_abs(ws.data(), data(), x_sw, y, y_sw); + + this->cond_flip_sign(relative_size > 0); + this->swap_reg(ws); + + return (*this); + } + +/* +* Multiplication Operator +*/ +BigInt& BigInt::operator*=(const BigInt& y) + { + secure_vector<word> ws; + return this->mul(y, ws); + } + +BigInt& BigInt::mul(const BigInt& y, secure_vector<word>& ws) + { + const size_t x_sw = sig_words(); + const size_t y_sw = y.sig_words(); + set_sign((sign() == y.sign()) ? Positive : Negative); + + if(x_sw == 0 || y_sw == 0) + { + clear(); + set_sign(Positive); + } + else if(x_sw == 1 && y_sw) + { + grow_to(y_sw + 1); + bigint_linmul3(mutable_data(), y.data(), y_sw, word_at(0)); + } + else if(y_sw == 1 && x_sw) + { + word carry = bigint_linmul2(mutable_data(), x_sw, y.word_at(0)); + set_word_at(x_sw, carry); + } + else + { + const size_t new_size = x_sw + y_sw + 1; + ws.resize(new_size); + secure_vector<word> z_reg(new_size); + + bigint_mul(z_reg.data(), z_reg.size(), + data(), size(), x_sw, + y.data(), y.size(), y_sw, + ws.data(), ws.size()); + + this->swap_reg(z_reg); + } + + return (*this); + } + +BigInt& BigInt::square(secure_vector<word>& ws) + { + const size_t sw = sig_words(); + + secure_vector<word> z(2*sw); + ws.resize(z.size()); + + bigint_sqr(z.data(), z.size(), + data(), size(), sw, + ws.data(), ws.size()); + + swap_reg(z); + set_sign(BigInt::Positive); + + return (*this); + } + +BigInt& BigInt::operator*=(word y) + { + if(y == 0) + { + clear(); + set_sign(Positive); + } + + const word carry = bigint_linmul2(mutable_data(), size(), y); + set_word_at(size(), carry); + + return (*this); + } + +/* +* Division Operator +*/ +BigInt& BigInt::operator/=(const BigInt& y) + { + if(y.sig_words() == 1 && is_power_of_2(y.word_at(0))) + (*this) >>= (y.bits() - 1); + else + (*this) = (*this) / y; + return (*this); + } + +/* +* Modulo Operator +*/ +BigInt& BigInt::operator%=(const BigInt& mod) + { + return (*this = (*this) % mod); + } + +/* +* Modulo Operator +*/ +word BigInt::operator%=(word mod) + { + if(mod == 0) + throw BigInt::DivideByZero(); + + word remainder = 0; + + if(is_power_of_2(mod)) + { + remainder = (word_at(0) & (mod - 1)); + } + else + { + const size_t sw = sig_words(); + for(size_t i = sw; i > 0; --i) + remainder = bigint_modop(remainder, word_at(i-1), mod); + } + + if(remainder && sign() == BigInt::Negative) + remainder = mod - remainder; + + m_data.set_to_zero(); + m_data.set_word_at(0, remainder); + set_sign(BigInt::Positive); + return remainder; + } + +/* +* Left Shift Operator +*/ +BigInt& BigInt::operator<<=(size_t shift) + { + const size_t shift_words = shift / BOTAN_MP_WORD_BITS; + const size_t shift_bits = shift % BOTAN_MP_WORD_BITS; + const size_t size = sig_words(); + + const size_t bits_free = top_bits_free(); + + const size_t new_size = size + shift_words + (bits_free < shift_bits); + + m_data.grow_to(new_size); + + bigint_shl1(m_data.mutable_data(), new_size, size, shift_words, shift_bits); + + return (*this); + } + +/* +* Right Shift Operator +*/ +BigInt& BigInt::operator>>=(size_t shift) + { + const size_t shift_words = shift / BOTAN_MP_WORD_BITS; + const size_t shift_bits = shift % BOTAN_MP_WORD_BITS; + + bigint_shr1(m_data.mutable_data(), m_data.size(), shift_words, shift_bits); + + if(is_negative() && is_zero()) + set_sign(Positive); + + return (*this); + } + +} diff --git a/comm/third_party/botan/src/lib/math/bigint/big_ops3.cpp b/comm/third_party/botan/src/lib/math/bigint/big_ops3.cpp new file mode 100644 index 0000000000..11804762b9 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/bigint/big_ops3.cpp @@ -0,0 +1,214 @@ +/* +* BigInt Binary Operators +* (C) 1999-2007,2018 Jack Lloyd +* 2016 Matthias Gierlings +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/bigint.h> +#include <botan/divide.h> +#include <botan/internal/mp_core.h> +#include <botan/internal/bit_ops.h> +#include <algorithm> + +namespace Botan { + +//static +BigInt BigInt::add2(const BigInt& x, const word y[], size_t y_words, BigInt::Sign y_sign) + { + const size_t x_sw = x.sig_words(); + + BigInt z(x.sign(), std::max(x_sw, y_words) + 1); + + if(x.sign() == y_sign) + { + bigint_add3(z.mutable_data(), x.data(), x_sw, y, y_words); + } + else + { + const int32_t relative_size = bigint_sub_abs(z.mutable_data(), x.data(), x_sw, y, y_words); + + //z.sign_fixup(relative_size, y_sign); + if(relative_size < 0) + z.set_sign(y_sign); + else if(relative_size == 0) + z.set_sign(BigInt::Positive); + } + + return z; + } + +/* +* Multiplication Operator +*/ +BigInt operator*(const BigInt& x, const BigInt& y) + { + const size_t x_sw = x.sig_words(); + const size_t y_sw = y.sig_words(); + + BigInt z(BigInt::Positive, x.size() + y.size()); + + if(x_sw == 1 && y_sw) + bigint_linmul3(z.mutable_data(), y.data(), y_sw, x.word_at(0)); + else if(y_sw == 1 && x_sw) + bigint_linmul3(z.mutable_data(), x.data(), x_sw, y.word_at(0)); + else if(x_sw && y_sw) + { + secure_vector<word> workspace(z.size()); + + bigint_mul(z.mutable_data(), z.size(), + x.data(), x.size(), x_sw, + y.data(), y.size(), y_sw, + workspace.data(), workspace.size()); + } + + z.cond_flip_sign(x_sw > 0 && y_sw > 0 && x.sign() != y.sign()); + + return z; + } + +/* +* Multiplication Operator +*/ +BigInt operator*(const BigInt& x, word y) + { + const size_t x_sw = x.sig_words(); + + BigInt z(BigInt::Positive, x_sw + 1); + + if(x_sw && y) + { + bigint_linmul3(z.mutable_data(), x.data(), x_sw, y); + z.set_sign(x.sign()); + } + + return z; + } + +/* +* Division Operator +*/ +BigInt operator/(const BigInt& x, const BigInt& y) + { + if(y.sig_words() == 1) + { + return x / y.word_at(0); + } + + BigInt q, r; + vartime_divide(x, y, q, r); + return q; + } + +/* +* Division Operator +*/ +BigInt operator/(const BigInt& x, word y) + { + if(y == 0) + throw BigInt::DivideByZero(); + else if(y == 1) + return x; + else if(y == 2) + return (x >> 1); + else if(y <= 255) + { + BigInt q; + uint8_t r; + ct_divide_u8(x, static_cast<uint8_t>(y), q, r); + return q; + } + + BigInt q, r; + vartime_divide(x, y, q, r); + return q; + } + +/* +* Modulo Operator +*/ +BigInt operator%(const BigInt& n, const BigInt& mod) + { + if(mod.is_zero()) + throw BigInt::DivideByZero(); + if(mod.is_negative()) + throw Invalid_Argument("BigInt::operator%: modulus must be > 0"); + if(n.is_positive() && mod.is_positive() && n < mod) + return n; + + if(mod.sig_words() == 1) + { + return n % mod.word_at(0); + } + + BigInt q, r; + vartime_divide(n, mod, q, r); + return r; + } + +/* +* Modulo Operator +*/ +word operator%(const BigInt& n, word mod) + { + if(mod == 0) + throw BigInt::DivideByZero(); + + if(mod == 1) + return 0; + + word remainder = 0; + + if(is_power_of_2(mod)) + { + remainder = (n.word_at(0) & (mod - 1)); + } + else + { + const size_t sw = n.sig_words(); + for(size_t i = sw; i > 0; --i) + { + remainder = bigint_modop(remainder, n.word_at(i-1), mod); + } + } + + if(remainder && n.sign() == BigInt::Negative) + return mod - remainder; + return remainder; + } + +/* +* Left Shift Operator +*/ +BigInt operator<<(const BigInt& x, size_t shift) + { + const size_t shift_words = shift / BOTAN_MP_WORD_BITS, + shift_bits = shift % BOTAN_MP_WORD_BITS; + + const size_t x_sw = x.sig_words(); + + BigInt y(x.sign(), x_sw + shift_words + (shift_bits ? 1 : 0)); + bigint_shl2(y.mutable_data(), x.data(), x_sw, shift_words, shift_bits); + return y; + } + +/* +* Right Shift Operator +*/ +BigInt operator>>(const BigInt& x, size_t shift) + { + const size_t shift_words = shift / BOTAN_MP_WORD_BITS; + const size_t shift_bits = shift % BOTAN_MP_WORD_BITS; + const size_t x_sw = x.sig_words(); + + BigInt y(x.sign(), x_sw - shift_words); + bigint_shr2(y.mutable_data(), x.data(), x_sw, shift_words, shift_bits); + + if(x.is_negative() && y.is_zero()) + y.set_sign(BigInt::Positive); + + return y; + } + +} diff --git a/comm/third_party/botan/src/lib/math/bigint/big_rand.cpp b/comm/third_party/botan/src/lib/math/bigint/big_rand.cpp new file mode 100644 index 0000000000..dd4cb5eaba --- /dev/null +++ b/comm/third_party/botan/src/lib/math/bigint/big_rand.cpp @@ -0,0 +1,64 @@ +/* +* BigInt Random Generation +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/bigint.h> +#include <botan/rng.h> +#include <botan/internal/rounding.h> + +namespace Botan { + +/* +* Randomize this number +*/ +void BigInt::randomize(RandomNumberGenerator& rng, + size_t bitsize, bool set_high_bit) + { + set_sign(Positive); + + if(bitsize == 0) + { + clear(); + } + else + { + secure_vector<uint8_t> array = rng.random_vec(round_up(bitsize, 8) / 8); + + // Always cut unwanted bits + if(bitsize % 8) + array[0] &= 0xFF >> (8 - (bitsize % 8)); + + // Set the highest bit if wanted + if (set_high_bit) + array[0] |= 0x80 >> ((bitsize % 8) ? (8 - bitsize % 8) : 0); + + binary_decode(array); + } + } + +/* +* Generate a random integer within given range +*/ +BigInt BigInt::random_integer(RandomNumberGenerator& rng, + const BigInt& min, const BigInt& max) + { + if(min.is_negative() || max.is_negative() || max <= min) + throw Invalid_Argument("BigInt::random_integer invalid range"); + + BigInt r; + + const size_t bits = max.bits(); + + do + { + r.randomize(rng, bits, false); + } + while(r < min || r >= max); + + return r; + } + +} diff --git a/comm/third_party/botan/src/lib/math/bigint/bigint.cpp b/comm/third_party/botan/src/lib/math/bigint/bigint.cpp new file mode 100644 index 0000000000..7bcbaf37f0 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/bigint/bigint.cpp @@ -0,0 +1,551 @@ +/* +* BigInt Base +* (C) 1999-2011,2012,2014,2019 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/bigint.h> +#include <botan/internal/mp_core.h> +#include <botan/internal/rounding.h> +#include <botan/internal/bit_ops.h> +#include <botan/internal/ct_utils.h> +#include <botan/loadstor.h> + +namespace Botan { + +BigInt::BigInt(const word words[], size_t length) + { + m_data.set_words(words, length); + } + +/* +* Construct a BigInt from a regular number +*/ +BigInt::BigInt(uint64_t n) + { + if(n > 0) + { +#if BOTAN_MP_WORD_BITS == 32 + m_data.set_word_at(0, static_cast<word>(n)); + m_data.set_word_at(1, static_cast<word>(n >> 32)); +#else + m_data.set_word_at(0, n); +#endif + } + + } + +/* +* Construct a BigInt of the specified size +*/ +BigInt::BigInt(Sign s, size_t size) + { + m_data.set_size(size); + m_signedness = s; + } + +/* +* Construct a BigInt from a string +*/ +BigInt::BigInt(const std::string& str) + { + Base base = Decimal; + size_t markers = 0; + bool negative = false; + + if(str.length() > 0 && str[0] == '-') + { + markers += 1; + negative = true; + } + + if(str.length() > markers + 2 && str[markers ] == '0' && + str[markers + 1] == 'x') + { + markers += 2; + base = Hexadecimal; + } + + *this = decode(cast_char_ptr_to_uint8(str.data()) + markers, + str.length() - markers, base); + + if(negative) set_sign(Negative); + else set_sign(Positive); + } + +BigInt::BigInt(const uint8_t input[], size_t length) + { + binary_decode(input, length); + } + +/* +* Construct a BigInt from an encoded BigInt +*/ +BigInt::BigInt(const uint8_t input[], size_t length, Base base) + { + *this = decode(input, length, base); + } + +BigInt::BigInt(const uint8_t buf[], size_t length, size_t max_bits) + { + if(8 * length > max_bits) + length = (max_bits + 7) / 8; + + binary_decode(buf, length); + + if(8 * length > max_bits) + *this >>= (8 - (max_bits % 8)); + } + +/* +* Construct a BigInt from an encoded BigInt +*/ +BigInt::BigInt(RandomNumberGenerator& rng, size_t bits, bool set_high_bit) + { + randomize(rng, bits, set_high_bit); + } + +uint8_t BigInt::byte_at(size_t n) const + { + return get_byte(sizeof(word) - (n % sizeof(word)) - 1, + word_at(n / sizeof(word))); + } + +int32_t BigInt::cmp_word(word other) const + { + if(is_negative()) + return -1; // other is positive ... + + const size_t sw = this->sig_words(); + if(sw > 1) + return 1; // must be larger since other is just one word ... + + return bigint_cmp(this->data(), sw, &other, 1); + } + +/* +* Comparison Function +*/ +int32_t BigInt::cmp(const BigInt& other, bool check_signs) const + { + if(check_signs) + { + if(other.is_positive() && this->is_negative()) + return -1; + + if(other.is_negative() && this->is_positive()) + return 1; + + if(other.is_negative() && this->is_negative()) + return (-bigint_cmp(this->data(), this->size(), + other.data(), other.size())); + } + + return bigint_cmp(this->data(), this->size(), + other.data(), other.size()); + } + +bool BigInt::is_equal(const BigInt& other) const + { + if(this->sign() != other.sign()) + return false; + + return bigint_ct_is_eq(this->data(), this->sig_words(), + other.data(), other.sig_words()).is_set(); + } + +bool BigInt::is_less_than(const BigInt& other) const + { + if(this->is_negative() && other.is_positive()) + return true; + + if(this->is_positive() && other.is_negative()) + return false; + + if(other.is_negative() && this->is_negative()) + { + return bigint_ct_is_lt(other.data(), other.sig_words(), + this->data(), this->sig_words()).is_set(); + } + + return bigint_ct_is_lt(this->data(), this->sig_words(), + other.data(), other.sig_words()).is_set(); + } + +void BigInt::encode_words(word out[], size_t size) const + { + const size_t words = sig_words(); + + if(words > size) + throw Encoding_Error("BigInt::encode_words value too large to encode"); + + clear_mem(out, size); + copy_mem(out, data(), words); + } + +size_t BigInt::Data::calc_sig_words() const + { + const size_t sz = m_reg.size(); + size_t sig = sz; + + word sub = 1; + + for(size_t i = 0; i != sz; ++i) + { + const word w = m_reg[sz - i - 1]; + sub &= ct_is_zero(w); + sig -= sub; + } + + /* + * This depends on the data so is poisoned, but unpoison it here as + * later conditionals are made on the size. + */ + CT::unpoison(sig); + + return sig; + } + +/* +* Return bits {offset...offset+length} +*/ +uint32_t BigInt::get_substring(size_t offset, size_t length) const + { + if(length == 0 || length > 32) + throw Invalid_Argument("BigInt::get_substring invalid substring length"); + + const uint32_t mask = 0xFFFFFFFF >> (32 - length); + + const size_t word_offset = offset / BOTAN_MP_WORD_BITS; + const size_t wshift = (offset % BOTAN_MP_WORD_BITS); + + /* + * The substring is contained within one or at most two words. The + * offset and length are not secret, so we can perform conditional + * operations on those values. + */ + const word w0 = word_at(word_offset); + + if(wshift == 0 || (offset + length) / BOTAN_MP_WORD_BITS == word_offset) + { + return static_cast<uint32_t>(w0 >> wshift) & mask; + } + else + { + const word w1 = word_at(word_offset + 1); + return static_cast<uint32_t>((w0 >> wshift) | (w1 << (BOTAN_MP_WORD_BITS - wshift))) & mask; + } + } + +/* +* Convert this number to a uint32_t, if possible +*/ +uint32_t BigInt::to_u32bit() const + { + if(is_negative()) + throw Encoding_Error("BigInt::to_u32bit: Number is negative"); + if(bits() > 32) + throw Encoding_Error("BigInt::to_u32bit: Number is too big to convert"); + + uint32_t out = 0; + for(size_t i = 0; i != 4; ++i) + out = (out << 8) | byte_at(3-i); + return out; + } + +/* +* Set bit number n +*/ +void BigInt::conditionally_set_bit(size_t n, bool set_it) + { + const size_t which = n / BOTAN_MP_WORD_BITS; + const word mask = static_cast<word>(set_it) << (n % BOTAN_MP_WORD_BITS); + m_data.set_word_at(which, word_at(which) | mask); + } + +/* +* Clear bit number n +*/ +void BigInt::clear_bit(size_t n) + { + const size_t which = n / BOTAN_MP_WORD_BITS; + + if(which < size()) + { + const word mask = ~(static_cast<word>(1) << (n % BOTAN_MP_WORD_BITS)); + m_data.set_word_at(which, word_at(which) & mask); + } + } + +size_t BigInt::bytes() const + { + return round_up(bits(), 8) / 8; + } + +size_t BigInt::top_bits_free() const + { + const size_t words = sig_words(); + + const word top_word = word_at(words - 1); + const size_t bits_used = high_bit(top_word); + CT::unpoison(bits_used); + return BOTAN_MP_WORD_BITS - bits_used; + } + +size_t BigInt::bits() const + { + const size_t words = sig_words(); + + if(words == 0) + return 0; + + const size_t full_words = (words - 1) * BOTAN_MP_WORD_BITS; + const size_t top_bits = BOTAN_MP_WORD_BITS - top_bits_free(); + + return full_words + top_bits; + } + +/* +* Calcluate the size in a certain base +*/ +size_t BigInt::encoded_size(Base base) const + { + static const double LOG_2_BASE_10 = 0.30102999566; + + if(base == Binary) + return bytes(); + else if(base == Hexadecimal) + return 2*bytes(); + else if(base == Decimal) + return static_cast<size_t>((bits() * LOG_2_BASE_10) + 1); + else + throw Invalid_Argument("Unknown base for BigInt encoding"); + } + +/* +* Return the negation of this number +*/ +BigInt BigInt::operator-() const + { + BigInt x = (*this); + x.flip_sign(); + return x; + } + +size_t BigInt::reduce_below(const BigInt& p, secure_vector<word>& ws) + { + if(p.is_negative() || this->is_negative()) + throw Invalid_Argument("BigInt::reduce_below both values must be positive"); + + const size_t p_words = p.sig_words(); + + if(size() < p_words + 1) + grow_to(p_words + 1); + + if(ws.size() < p_words + 1) + ws.resize(p_words + 1); + + clear_mem(ws.data(), ws.size()); + + size_t reductions = 0; + + for(;;) + { + word borrow = bigint_sub3(ws.data(), data(), p_words + 1, p.data(), p_words); + if(borrow) + break; + + ++reductions; + swap_reg(ws); + } + + return reductions; + } + +void BigInt::ct_reduce_below(const BigInt& mod, secure_vector<word>& ws, size_t bound) + { + if(mod.is_negative() || this->is_negative()) + throw Invalid_Argument("BigInt::ct_reduce_below both values must be positive"); + + const size_t mod_words = mod.sig_words(); + + grow_to(mod_words); + + const size_t sz = size(); + + ws.resize(sz); + + clear_mem(ws.data(), sz); + + for(size_t i = 0; i != bound; ++i) + { + word borrow = bigint_sub3(ws.data(), data(), sz, mod.data(), mod_words); + + CT::Mask<word>::is_zero(borrow).select_n(mutable_data(), ws.data(), data(), sz); + } + } + +/* +* Return the absolute value of this number +*/ +BigInt BigInt::abs() const + { + BigInt x = (*this); + x.set_sign(Positive); + return x; + } + +void BigInt::binary_encode(uint8_t buf[]) const + { + this->binary_encode(buf, bytes()); + } + +/* +* Encode this number into bytes +*/ +void BigInt::binary_encode(uint8_t output[], size_t len) const + { + const size_t full_words = len / sizeof(word); + const size_t extra_bytes = len % sizeof(word); + + for(size_t i = 0; i != full_words; ++i) + { + const word w = word_at(i); + store_be(w, output + (len - (i+1)*sizeof(word))); + } + + if(extra_bytes > 0) + { + const word w = word_at(full_words); + + for(size_t i = 0; i != extra_bytes; ++i) + { + output[extra_bytes - i - 1] = get_byte(sizeof(word) - i - 1, w); + } + } + } + +/* +* Set this number to the value in buf +*/ +void BigInt::binary_decode(const uint8_t buf[], size_t length) + { + clear(); + + const size_t full_words = length / sizeof(word); + const size_t extra_bytes = length % sizeof(word); + + secure_vector<word> reg((round_up(full_words + (extra_bytes > 0 ? 1 : 0), 8))); + + for(size_t i = 0; i != full_words; ++i) + { + reg[i] = load_be<word>(buf + length - sizeof(word)*(i+1), 0); + } + + if(extra_bytes > 0) + { + for(size_t i = 0; i != extra_bytes; ++i) + reg[full_words] = (reg[full_words] << 8) | buf[i]; + } + + m_data.swap(reg); + } + +void BigInt::ct_cond_add(bool predicate, const BigInt& value) + { + if(this->is_negative() || value.is_negative()) + throw Invalid_Argument("BigInt::ct_cond_add requires both values to be positive"); + this->grow_to(1 + value.sig_words()); + + bigint_cnd_add(static_cast<word>(predicate), + this->mutable_data(), this->size(), + value.data(), value.sig_words()); + } + +void BigInt::ct_cond_swap(bool predicate, BigInt& other) + { + const size_t max_words = std::max(size(), other.size()); + grow_to(max_words); + other.grow_to(max_words); + + bigint_cnd_swap(predicate, this->mutable_data(), other.mutable_data(), max_words); + } + +void BigInt::cond_flip_sign(bool predicate) + { + // This code is assuming Negative == 0, Positive == 1 + + const auto mask = CT::Mask<uint8_t>::expand(predicate); + + const uint8_t current_sign = static_cast<uint8_t>(sign()); + + const uint8_t new_sign = mask.select(current_sign ^ 1, current_sign); + + set_sign(static_cast<Sign>(new_sign)); + } + +void BigInt::ct_cond_assign(bool predicate, const BigInt& other) + { + const size_t t_words = size(); + const size_t o_words = other.size(); + + if(o_words < t_words) + grow_to(o_words); + + const size_t r_words = std::max(t_words, o_words); + + const auto mask = CT::Mask<word>::expand(predicate); + + for(size_t i = 0; i != r_words; ++i) + { + const word o_word = other.word_at(i); + const word t_word = this->word_at(i); + this->set_word_at(i, mask.select(o_word, t_word)); + } + + const bool different_sign = sign() != other.sign(); + cond_flip_sign(predicate && different_sign); + } + +#if defined(BOTAN_HAS_VALGRIND) +void BigInt::const_time_poison() const + { + CT::poison(m_data.const_data(), m_data.size()); + } + +void BigInt::const_time_unpoison() const + { + CT::unpoison(m_data.const_data(), m_data.size()); + } +#endif + +void BigInt::const_time_lookup(secure_vector<word>& output, + const std::vector<BigInt>& vec, + size_t idx) + { + const size_t words = output.size(); + + clear_mem(output.data(), output.size()); + + CT::poison(&idx, sizeof(idx)); + + for(size_t i = 0; i != vec.size(); ++i) + { + BOTAN_ASSERT(vec[i].size() >= words, + "Word size as expected in const_time_lookup"); + + const auto mask = CT::Mask<word>::is_equal(i, idx); + + for(size_t w = 0; w != words; ++w) + { + const word viw = vec[i].word_at(w); + output[w] = mask.if_set_return(viw); + } + } + + CT::unpoison(idx); + CT::unpoison(output.data(), output.size()); + } + +} diff --git a/comm/third_party/botan/src/lib/math/bigint/bigint.h b/comm/third_party/botan/src/lib/math/bigint/bigint.h new file mode 100644 index 0000000000..33e79d0122 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/bigint/bigint.h @@ -0,0 +1,1153 @@ +/* +* BigInt +* (C) 1999-2008,2012,2018 Jack Lloyd +* 2007 FlexSecure +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_BIGINT_H_ +#define BOTAN_BIGINT_H_ + +#include <botan/types.h> +#include <botan/secmem.h> +#include <botan/exceptn.h> +#include <iosfwd> + +namespace Botan { + +class RandomNumberGenerator; + +/** +* Arbitrary precision integer +*/ +class BOTAN_PUBLIC_API(2,0) BigInt final + { + public: + /** + * Base enumerator for encoding and decoding + */ + enum Base { Decimal = 10, Hexadecimal = 16, Binary = 256 }; + + /** + * Sign symbol definitions for positive and negative numbers + */ + enum Sign { Negative = 0, Positive = 1 }; + + /** + * DivideByZero Exception + * + * In a future release this exception will be removed and its usage + * replaced by Invalid_Argument + */ + class BOTAN_PUBLIC_API(2,0) DivideByZero final : public Invalid_Argument + { + public: + DivideByZero() : Invalid_Argument("BigInt divide by zero") {} + }; + + /** + * Create empty BigInt + */ + BigInt() = default; + + /** + * Create BigInt from 64 bit integer + * @param n initial value of this BigInt + */ + BigInt(uint64_t n); + + /** + * Copy Constructor + * @param other the BigInt to copy + */ + BigInt(const BigInt& other) = default; + + /** + * Create BigInt from a string. If the string starts with 0x the + * rest of the string will be interpreted as hexadecimal digits. + * Otherwise, it will be interpreted as a decimal number. + * + * @param str the string to parse for an integer value + */ + explicit BigInt(const std::string& str); + + /** + * Create a BigInt from an integer in a byte array + * @param buf the byte array holding the value + * @param length size of buf + */ + BigInt(const uint8_t buf[], size_t length); + + /** + * Create a BigInt from an integer in a byte array + * @param vec the byte vector holding the value + */ + template<typename Alloc> + explicit BigInt(const std::vector<uint8_t, Alloc>& vec) : BigInt(vec.data(), vec.size()) {} + + /** + * Create a BigInt from an integer in a byte array + * @param buf the byte array holding the value + * @param length size of buf + * @param base is the number base of the integer in buf + */ + BigInt(const uint8_t buf[], size_t length, Base base); + + /** + * Create a BigInt from an integer in a byte array + * @param buf the byte array holding the value + * @param length size of buf + * @param max_bits if the resulting integer is more than max_bits, + * it will be shifted so it is at most max_bits in length. + */ + BigInt(const uint8_t buf[], size_t length, size_t max_bits); + + /** + * Create a BigInt from an array of words + * @param words the words + * @param length number of words + */ + BigInt(const word words[], size_t length); + + /** + * \brief Create a random BigInt of the specified size + * + * @param rng random number generator + * @param bits size in bits + * @param set_high_bit if true, the highest bit is always set + * + * @see randomize + */ + BigInt(RandomNumberGenerator& rng, size_t bits, bool set_high_bit = true); + + /** + * Create BigInt of specified size, all zeros + * @param sign the sign + * @param n size of the internal register in words + */ + BigInt(Sign sign, size_t n); + + /** + * Move constructor + */ + BigInt(BigInt&& other) + { + this->swap(other); + } + + ~BigInt() { const_time_unpoison(); } + + /** + * Move assignment + */ + BigInt& operator=(BigInt&& other) + { + if(this != &other) + this->swap(other); + + return (*this); + } + + /** + * Copy assignment + */ + BigInt& operator=(const BigInt&) = default; + + /** + * Swap this value with another + * @param other BigInt to swap values with + */ + void swap(BigInt& other) + { + m_data.swap(other.m_data); + std::swap(m_signedness, other.m_signedness); + } + + void swap_reg(secure_vector<word>& reg) + { + m_data.swap(reg); + // sign left unchanged + } + + /** + * += operator + * @param y the BigInt to add to this + */ + BigInt& operator+=(const BigInt& y) + { + return add(y.data(), y.sig_words(), y.sign()); + } + + /** + * += operator + * @param y the word to add to this + */ + BigInt& operator+=(word y) + { + return add(&y, 1, Positive); + } + + /** + * -= operator + * @param y the BigInt to subtract from this + */ + BigInt& operator-=(const BigInt& y) + { + return sub(y.data(), y.sig_words(), y.sign()); + } + + /** + * -= operator + * @param y the word to subtract from this + */ + BigInt& operator-=(word y) + { + return sub(&y, 1, Positive); + } + + /** + * *= operator + * @param y the BigInt to multiply with this + */ + BigInt& operator*=(const BigInt& y); + + /** + * *= operator + * @param y the word to multiply with this + */ + BigInt& operator*=(word y); + + /** + * /= operator + * @param y the BigInt to divide this by + */ + BigInt& operator/=(const BigInt& y); + + /** + * Modulo operator + * @param y the modulus to reduce this by + */ + BigInt& operator%=(const BigInt& y); + + /** + * Modulo operator + * @param y the modulus (word) to reduce this by + */ + word operator%=(word y); + + /** + * Left shift operator + * @param shift the number of bits to shift this left by + */ + BigInt& operator<<=(size_t shift); + + /** + * Right shift operator + * @param shift the number of bits to shift this right by + */ + BigInt& operator>>=(size_t shift); + + /** + * Increment operator + */ + BigInt& operator++() { return (*this += 1); } + + /** + * Decrement operator + */ + BigInt& operator--() { return (*this -= 1); } + + /** + * Postfix increment operator + */ + BigInt operator++(int) { BigInt x = (*this); ++(*this); return x; } + + /** + * Postfix decrement operator + */ + BigInt operator--(int) { BigInt x = (*this); --(*this); return x; } + + /** + * Unary negation operator + * @return negative this + */ + BigInt operator-() const; + + /** + * ! operator + * @return true iff this is zero, otherwise false + */ + bool operator !() const { return (!is_nonzero()); } + + static BigInt add2(const BigInt& x, const word y[], size_t y_words, Sign y_sign); + + BigInt& add(const word y[], size_t y_words, Sign sign); + + BigInt& sub(const word y[], size_t y_words, Sign sign) + { + return add(y, y_words, sign == Positive ? Negative : Positive); + } + + /** + * Multiply this with y + * @param y the BigInt to multiply with this + * @param ws a temp workspace + */ + BigInt& mul(const BigInt& y, secure_vector<word>& ws); + + /** + * Square value of *this + * @param ws a temp workspace + */ + BigInt& square(secure_vector<word>& ws); + + /** + * Set *this to y - *this + * @param y the BigInt to subtract from as a sequence of words + * @param y_words length of y in words + * @param ws a temp workspace + */ + BigInt& rev_sub(const word y[], size_t y_words, secure_vector<word>& ws); + + /** + * Set *this to (*this + y) % mod + * This function assumes *this is >= 0 && < mod + * @param y the BigInt to add - assumed y >= 0 and y < mod + * @param mod the positive modulus + * @param ws a temp workspace + */ + BigInt& mod_add(const BigInt& y, const BigInt& mod, secure_vector<word>& ws); + + /** + * Set *this to (*this - y) % mod + * This function assumes *this is >= 0 && < mod + * @param y the BigInt to subtract - assumed y >= 0 and y < mod + * @param mod the positive modulus + * @param ws a temp workspace + */ + BigInt& mod_sub(const BigInt& y, const BigInt& mod, secure_vector<word>& ws); + + /** + * Set *this to (*this * y) % mod + * This function assumes *this is >= 0 && < mod + * y should be small, less than 16 + * @param y the small integer to multiply by + * @param mod the positive modulus + * @param ws a temp workspace + */ + BigInt& mod_mul(uint8_t y, const BigInt& mod, secure_vector<word>& ws); + + /** + * Return *this % mod + * + * Assumes that *this is (if anything) only slightly larger than + * mod and performs repeated subtractions. It should not be used if + * *this is much larger than mod, instead use modulo operator. + */ + size_t reduce_below(const BigInt& mod, secure_vector<word> &ws); + + /** + * Return *this % mod + * + * Assumes that *this is (if anything) only slightly larger than mod and + * performs repeated subtractions. It should not be used if *this is much + * larger than mod, instead use modulo operator. + * + * Performs exactly bound subtractions, so if *this is >= bound*mod then the + * result will not be fully reduced. If bound is zero, nothing happens. + */ + void ct_reduce_below(const BigInt& mod, secure_vector<word> &ws, size_t bound); + + /** + * Zeroize the BigInt. The size of the underlying register is not + * modified. + */ + void clear() { m_data.set_to_zero(); m_signedness = Positive; } + + /** + * Compare this to another BigInt + * @param n the BigInt value to compare with + * @param check_signs include sign in comparison? + * @result if (this<n) return -1, if (this>n) return 1, if both + * values are identical return 0 [like Perl's <=> operator] + */ + int32_t cmp(const BigInt& n, bool check_signs = true) const; + + /** + * Compare this to another BigInt + * @param n the BigInt value to compare with + * @result true if this == n or false otherwise + */ + bool is_equal(const BigInt& n) const; + + /** + * Compare this to another BigInt + * @param n the BigInt value to compare with + * @result true if this < n or false otherwise + */ + bool is_less_than(const BigInt& n) const; + + /** + * Compare this to an integer + * @param n the value to compare with + * @result if (this<n) return -1, if (this>n) return 1, if both + * values are identical return 0 [like Perl's <=> operator] + */ + int32_t cmp_word(word n) const; + + /** + * Test if the integer has an even value + * @result true if the integer is even, false otherwise + */ + bool is_even() const { return (get_bit(0) == 0); } + + /** + * Test if the integer has an odd value + * @result true if the integer is odd, false otherwise + */ + bool is_odd() const { return (get_bit(0) == 1); } + + /** + * Test if the integer is not zero + * @result true if the integer is non-zero, false otherwise + */ + bool is_nonzero() const { return (!is_zero()); } + + /** + * Test if the integer is zero + * @result true if the integer is zero, false otherwise + */ + bool is_zero() const + { + return (sig_words() == 0); + } + + /** + * Set bit at specified position + * @param n bit position to set + */ + void set_bit(size_t n) + { + conditionally_set_bit(n, true); + } + + /** + * Conditionally set bit at specified position. Note if set_it is + * false, nothing happens, and if the bit is already set, it + * remains set. + * + * @param n bit position to set + * @param set_it if the bit should be set + */ + void conditionally_set_bit(size_t n, bool set_it); + + /** + * Clear bit at specified position + * @param n bit position to clear + */ + void clear_bit(size_t n); + + /** + * Clear all but the lowest n bits + * @param n amount of bits to keep + */ + void mask_bits(size_t n) + { + m_data.mask_bits(n); + } + + /** + * Return bit value at specified position + * @param n the bit offset to test + * @result true, if the bit at position n is set, false otherwise + */ + bool get_bit(size_t n) const + { + return ((word_at(n / BOTAN_MP_WORD_BITS) >> (n % BOTAN_MP_WORD_BITS)) & 1); + } + + /** + * Return (a maximum of) 32 bits of the complete value + * @param offset the offset to start extracting + * @param length amount of bits to extract (starting at offset) + * @result the integer extracted from the register starting at + * offset with specified length + */ + uint32_t get_substring(size_t offset, size_t length) const; + + /** + * Convert this value into a uint32_t, if it is in the range + * [0 ... 2**32-1], or otherwise throw an exception. + * @result the value as a uint32_t if conversion is possible + */ + uint32_t to_u32bit() const; + + /** + * Convert this value to a decimal string. + * Warning: decimal conversions are relatively slow + */ + std::string to_dec_string() const; + + /** + * Convert this value to a hexadecimal string. + */ + std::string to_hex_string() const; + + /** + * @param n the offset to get a byte from + * @result byte at offset n + */ + uint8_t byte_at(size_t n) const; + + /** + * Return the word at a specified position of the internal register + * @param n position in the register + * @return value at position n + */ + word word_at(size_t n) const + { + return m_data.get_word_at(n); + } + + void set_word_at(size_t i, word w) + { + m_data.set_word_at(i, w); + } + + void set_words(const word w[], size_t len) + { + m_data.set_words(w, len); + } + + /** + * Tests if the sign of the integer is negative + * @result true, iff the integer has a negative sign + */ + bool is_negative() const { return (sign() == Negative); } + + /** + * Tests if the sign of the integer is positive + * @result true, iff the integer has a positive sign + */ + bool is_positive() const { return (sign() == Positive); } + + /** + * Return the sign of the integer + * @result the sign of the integer + */ + Sign sign() const { return (m_signedness); } + + /** + * @result the opposite sign of the represented integer value + */ + Sign reverse_sign() const + { + if(sign() == Positive) + return Negative; + return Positive; + } + + /** + * Flip the sign of this BigInt + */ + void flip_sign() + { + set_sign(reverse_sign()); + } + + /** + * Set sign of the integer + * @param sign new Sign to set + */ + void set_sign(Sign sign) + { + if(sign == Negative && is_zero()) + sign = Positive; + + m_signedness = sign; + } + + /** + * @result absolute (positive) value of this + */ + BigInt abs() const; + + /** + * Give size of internal register + * @result size of internal register in words + */ + size_t size() const { return m_data.size(); } + + /** + * Return how many words we need to hold this value + * @result significant words of the represented integer value + */ + size_t sig_words() const + { + return m_data.sig_words(); + } + + /** + * Give byte length of the integer + * @result byte length of the represented integer value + */ + size_t bytes() const; + + /** + * Get the bit length of the integer + * @result bit length of the represented integer value + */ + size_t bits() const; + + /** + * Get the number of high bits unset in the top (allocated) word + * of this integer. Returns BOTAN_MP_WORD_BITS only iff *this is + * zero. Ignores sign. + */ + size_t top_bits_free() const; + + /** + * Return a mutable pointer to the register + * @result a pointer to the start of the internal register + */ + word* mutable_data() { return m_data.mutable_data(); } + + /** + * Return a const pointer to the register + * @result a pointer to the start of the internal register + */ + const word* data() const { return m_data.const_data(); } + + /** + * Don't use this function in application code + */ + secure_vector<word>& get_word_vector() { return m_data.mutable_vector(); } + + /** + * Don't use this function in application code + */ + const secure_vector<word>& get_word_vector() const { return m_data.const_vector(); } + + /** + * Increase internal register buffer to at least n words + * @param n new size of register + */ + void grow_to(size_t n) const { m_data.grow_to(n); } + + /** + * Resize the vector to the minimum word size to hold the integer, or + * min_size words, whichever is larger + */ + void BOTAN_DEPRECATED("Use resize if required") shrink_to_fit(size_t min_size = 0) + { + m_data.shrink_to_fit(min_size); + } + + void resize(size_t s) { m_data.resize(s); } + + /** + * Fill BigInt with a random number with size of bitsize + * + * If \p set_high_bit is true, the highest bit will be set, which causes + * the entropy to be \a bits-1. Otherwise the highest bit is randomly chosen + * by the rng, causing the entropy to be \a bits. + * + * @param rng the random number generator to use + * @param bitsize number of bits the created random value should have + * @param set_high_bit if true, the highest bit is always set + */ + void randomize(RandomNumberGenerator& rng, size_t bitsize, bool set_high_bit = true); + + /** + * Store BigInt-value in a given byte array + * @param buf destination byte array for the integer value + */ + void binary_encode(uint8_t buf[]) const; + + /** + * Store BigInt-value in a given byte array. If len is less than + * the size of the value, then it will be truncated. If len is + * greater than the size of the value, it will be zero-padded. + * If len exactly equals this->bytes(), this function behaves identically + * to binary_encode. + * + * @param buf destination byte array for the integer value + * @param len how many bytes to write + */ + void binary_encode(uint8_t buf[], size_t len) const; + + /** + * Read integer value from a byte array with given size + * @param buf byte array buffer containing the integer + * @param length size of buf + */ + void binary_decode(const uint8_t buf[], size_t length); + + /** + * Read integer value from a byte vector + * @param buf the vector to load from + */ + template<typename Alloc> + void binary_decode(const std::vector<uint8_t, Alloc>& buf) + { + binary_decode(buf.data(), buf.size()); + } + + /** + * @param base the base to measure the size for + * @return size of this integer in base base + * + * Deprecated. This is only needed when using the `encode` and + * `encode_locked` functions, which are also deprecated. + */ + BOTAN_DEPRECATED("See comments on declaration") + size_t encoded_size(Base base = Binary) const; + + /** + * Place the value into out, zero-padding up to size words + * Throw if *this cannot be represented in size words + */ + void encode_words(word out[], size_t size) const; + + /** + * If predicate is true assign other to *this + * Uses a masked operation to avoid side channels + */ + void ct_cond_assign(bool predicate, const BigInt& other); + + /** + * If predicate is true swap *this and other + * Uses a masked operation to avoid side channels + */ + void ct_cond_swap(bool predicate, BigInt& other); + + /** + * If predicate is true add value to *this + */ + void ct_cond_add(bool predicate, const BigInt& value); + + /** + * If predicate is true flip the sign of *this + */ + void cond_flip_sign(bool predicate); + +#if defined(BOTAN_HAS_VALGRIND) + void const_time_poison() const; + void const_time_unpoison() const; +#else + void const_time_poison() const {} + void const_time_unpoison() const {} +#endif + + /** + * @param rng a random number generator + * @param min the minimum value (must be non-negative) + * @param max the maximum value (must be non-negative and > min) + * @return random integer in [min,max) + */ + static BigInt random_integer(RandomNumberGenerator& rng, + const BigInt& min, + const BigInt& max); + + /** + * Create a power of two + * @param n the power of two to create + * @return bigint representing 2^n + */ + static BigInt power_of_2(size_t n) + { + BigInt b; + b.set_bit(n); + return b; + } + + /** + * Encode the integer value from a BigInt to a std::vector of bytes + * @param n the BigInt to use as integer source + * @result secure_vector of bytes containing the bytes of the integer + */ + static std::vector<uint8_t> encode(const BigInt& n) + { + std::vector<uint8_t> output(n.bytes()); + n.binary_encode(output.data()); + return output; + } + + /** + * Encode the integer value from a BigInt to a secure_vector of bytes + * @param n the BigInt to use as integer source + * @result secure_vector of bytes containing the bytes of the integer + */ + static secure_vector<uint8_t> encode_locked(const BigInt& n) + { + secure_vector<uint8_t> output(n.bytes()); + n.binary_encode(output.data()); + return output; + } + + /** + * Encode the integer value from a BigInt to a byte array + * @param buf destination byte array for the encoded integer + * @param n the BigInt to use as integer source + */ + static BOTAN_DEPRECATED("Use n.binary_encode") void encode(uint8_t buf[], const BigInt& n) + { + n.binary_encode(buf); + } + + /** + * Create a BigInt from an integer in a byte array + * @param buf the binary value to load + * @param length size of buf + * @result BigInt representing the integer in the byte array + */ + static BigInt decode(const uint8_t buf[], size_t length) + { + return BigInt(buf, length); + } + + /** + * Create a BigInt from an integer in a byte array + * @param buf the binary value to load + * @result BigInt representing the integer in the byte array + */ + template<typename Alloc> + static BigInt decode(const std::vector<uint8_t, Alloc>& buf) + { + return BigInt(buf); + } + + /** + * Encode the integer value from a BigInt to a std::vector of bytes + * @param n the BigInt to use as integer source + * @param base number-base of resulting byte array representation + * @result secure_vector of bytes containing the integer with given base + * + * Deprecated. If you need Binary, call the version of encode that doesn't + * take a Base. If you need Hex or Decimal output, use to_hex_string or + * to_dec_string resp. + */ + BOTAN_DEPRECATED("See comments on declaration") + static std::vector<uint8_t> encode(const BigInt& n, Base base); + + /** + * Encode the integer value from a BigInt to a secure_vector of bytes + * @param n the BigInt to use as integer source + * @param base number-base of resulting byte array representation + * @result secure_vector of bytes containing the integer with given base + * + * Deprecated. If you need Binary, call the version of encode_locked that + * doesn't take a Base. If you need Hex or Decimal output, use to_hex_string + * or to_dec_string resp. + */ + BOTAN_DEPRECATED("See comments on declaration") + static secure_vector<uint8_t> encode_locked(const BigInt& n, + Base base); + + /** + * Encode the integer value from a BigInt to a byte array + * @param buf destination byte array for the encoded integer + * value with given base + * @param n the BigInt to use as integer source + * @param base number-base of resulting byte array representation + * + * Deprecated. If you need Binary, call binary_encode. If you need + * Hex or Decimal output, use to_hex_string or to_dec_string resp. + */ + BOTAN_DEPRECATED("See comments on declaration") + static void encode(uint8_t buf[], const BigInt& n, Base base); + + /** + * Create a BigInt from an integer in a byte array + * @param buf the binary value to load + * @param length size of buf + * @param base number-base of the integer in buf + * @result BigInt representing the integer in the byte array + */ + static BigInt decode(const uint8_t buf[], size_t length, + Base base); + + /** + * Create a BigInt from an integer in a byte array + * @param buf the binary value to load + * @param base number-base of the integer in buf + * @result BigInt representing the integer in the byte array + */ + template<typename Alloc> + static BigInt decode(const std::vector<uint8_t, Alloc>& buf, Base base) + { + if(base == Binary) + return BigInt(buf); + return BigInt::decode(buf.data(), buf.size(), base); + } + + /** + * Encode a BigInt to a byte array according to IEEE 1363 + * @param n the BigInt to encode + * @param bytes the length of the resulting secure_vector<uint8_t> + * @result a secure_vector<uint8_t> containing the encoded BigInt + */ + static secure_vector<uint8_t> encode_1363(const BigInt& n, size_t bytes); + + static void encode_1363(uint8_t out[], size_t bytes, const BigInt& n); + + /** + * Encode two BigInt to a byte array according to IEEE 1363 + * @param n1 the first BigInt to encode + * @param n2 the second BigInt to encode + * @param bytes the length of the encoding of each single BigInt + * @result a secure_vector<uint8_t> containing the concatenation of the two encoded BigInt + */ + static secure_vector<uint8_t> encode_fixed_length_int_pair(const BigInt& n1, const BigInt& n2, size_t bytes); + + /** + * Set output = vec[idx].m_reg in constant time + * + * All elements of vec must have the same size, and output must be + * pre-allocated with the same size. + */ + static void BOTAN_DEPRECATED("No longer in use") const_time_lookup( + secure_vector<word>& output, + const std::vector<BigInt>& vec, + size_t idx); + + private: + + class Data + { + public: + word* mutable_data() + { + invalidate_sig_words(); + return m_reg.data(); + } + + const word* const_data() const + { + return m_reg.data(); + } + + secure_vector<word>& mutable_vector() + { + invalidate_sig_words(); + return m_reg; + } + + const secure_vector<word>& const_vector() const + { + return m_reg; + } + + word get_word_at(size_t n) const + { + if(n < m_reg.size()) + return m_reg[n]; + return 0; + } + + void set_word_at(size_t i, word w) + { + invalidate_sig_words(); + if(i >= m_reg.size()) + { + if(w == 0) + return; + grow_to(i + 1); + } + m_reg[i] = w; + } + + void set_words(const word w[], size_t len) + { + invalidate_sig_words(); + m_reg.assign(w, w + len); + } + + void set_to_zero() + { + m_reg.resize(m_reg.capacity()); + clear_mem(m_reg.data(), m_reg.size()); + m_sig_words = 0; + } + + void set_size(size_t s) + { + invalidate_sig_words(); + clear_mem(m_reg.data(), m_reg.size()); + m_reg.resize(s + (8 - (s % 8))); + } + + void mask_bits(size_t n) + { + if(n == 0) { return set_to_zero(); } + + const size_t top_word = n / BOTAN_MP_WORD_BITS; + + // if(top_word < sig_words()) ? + if(top_word < size()) + { + const word mask = (static_cast<word>(1) << (n % BOTAN_MP_WORD_BITS)) - 1; + const size_t len = size() - (top_word + 1); + if(len > 0) + { + clear_mem(&m_reg[top_word+1], len); + } + m_reg[top_word] &= mask; + invalidate_sig_words(); + } + } + + void grow_to(size_t n) const + { + if(n > size()) + { + if(n <= m_reg.capacity()) + m_reg.resize(n); + else + m_reg.resize(n + (8 - (n % 8))); + } + } + + size_t size() const { return m_reg.size(); } + + void shrink_to_fit(size_t min_size = 0) + { + const size_t words = std::max(min_size, sig_words()); + m_reg.resize(words); + } + + void resize(size_t s) + { + m_reg.resize(s); + } + + void swap(Data& other) + { + m_reg.swap(other.m_reg); + std::swap(m_sig_words, other.m_sig_words); + } + + void swap(secure_vector<word>& reg) + { + m_reg.swap(reg); + invalidate_sig_words(); + } + + void invalidate_sig_words() const + { + m_sig_words = sig_words_npos; + } + + size_t sig_words() const + { + if(m_sig_words == sig_words_npos) + { + m_sig_words = calc_sig_words(); + } + else + { + BOTAN_DEBUG_ASSERT(m_sig_words == calc_sig_words()); + } + return m_sig_words; + } + private: + static const size_t sig_words_npos = static_cast<size_t>(-1); + + size_t calc_sig_words() const; + + mutable secure_vector<word> m_reg; + mutable size_t m_sig_words = sig_words_npos; + }; + + Data m_data; + Sign m_signedness = Positive; + }; + +/* +* Arithmetic Operators +*/ +inline BigInt operator+(const BigInt& x, const BigInt& y) + { + return BigInt::add2(x, y.data(), y.sig_words(), y.sign()); + } + +inline BigInt operator+(const BigInt& x, word y) + { + return BigInt::add2(x, &y, 1, BigInt::Positive); + } + +inline BigInt operator+(word x, const BigInt& y) + { + return y + x; + } + +inline BigInt operator-(const BigInt& x, const BigInt& y) + { + return BigInt::add2(x, y.data(), y.sig_words(), y.reverse_sign()); + } + +inline BigInt operator-(const BigInt& x, word y) + { + return BigInt::add2(x, &y, 1, BigInt::Negative); + } + +BigInt BOTAN_PUBLIC_API(2,0) operator*(const BigInt& x, const BigInt& y); +BigInt BOTAN_PUBLIC_API(2,8) operator*(const BigInt& x, word y); +inline BigInt operator*(word x, const BigInt& y) { return y*x; } + +BigInt BOTAN_PUBLIC_API(2,0) operator/(const BigInt& x, const BigInt& d); +BigInt BOTAN_PUBLIC_API(2,0) operator/(const BigInt& x, word m); +BigInt BOTAN_PUBLIC_API(2,0) operator%(const BigInt& x, const BigInt& m); +word BOTAN_PUBLIC_API(2,0) operator%(const BigInt& x, word m); +BigInt BOTAN_PUBLIC_API(2,0) operator<<(const BigInt& x, size_t n); +BigInt BOTAN_PUBLIC_API(2,0) operator>>(const BigInt& x, size_t n); + +/* +* Comparison Operators +*/ +inline bool operator==(const BigInt& a, const BigInt& b) + { return a.is_equal(b); } +inline bool operator!=(const BigInt& a, const BigInt& b) + { return !a.is_equal(b); } +inline bool operator<=(const BigInt& a, const BigInt& b) + { return (a.cmp(b) <= 0); } +inline bool operator>=(const BigInt& a, const BigInt& b) + { return (a.cmp(b) >= 0); } +inline bool operator<(const BigInt& a, const BigInt& b) + { return a.is_less_than(b); } +inline bool operator>(const BigInt& a, const BigInt& b) + { return b.is_less_than(a); } + +inline bool operator==(const BigInt& a, word b) + { return (a.cmp_word(b) == 0); } +inline bool operator!=(const BigInt& a, word b) + { return (a.cmp_word(b) != 0); } +inline bool operator<=(const BigInt& a, word b) + { return (a.cmp_word(b) <= 0); } +inline bool operator>=(const BigInt& a, word b) + { return (a.cmp_word(b) >= 0); } +inline bool operator<(const BigInt& a, word b) + { return (a.cmp_word(b) < 0); } +inline bool operator>(const BigInt& a, word b) + { return (a.cmp_word(b) > 0); } + +/* +* I/O Operators +*/ +BOTAN_PUBLIC_API(2,0) std::ostream& operator<<(std::ostream&, const BigInt&); +BOTAN_PUBLIC_API(2,0) std::istream& operator>>(std::istream&, BigInt&); + +} + +namespace std { + +template<> +inline void swap<Botan::BigInt>(Botan::BigInt& x, Botan::BigInt& y) + { + x.swap(y); + } + +} + +#endif diff --git a/comm/third_party/botan/src/lib/math/bigint/divide.cpp b/comm/third_party/botan/src/lib/math/bigint/divide.cpp new file mode 100644 index 0000000000..0b23e2489e --- /dev/null +++ b/comm/third_party/botan/src/lib/math/bigint/divide.cpp @@ -0,0 +1,236 @@ +/* +* Division Algorithm +* (C) 1999-2007,2012,2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/divide.h> +#include <botan/internal/mp_core.h> +#include <botan/internal/mp_madd.h> +#include <botan/internal/ct_utils.h> +#include <botan/internal/bit_ops.h> + +namespace Botan { + +namespace { + +/* +* Handle signed operands, if necessary +*/ +void sign_fixup(const BigInt& x, const BigInt& y, BigInt& q, BigInt& r) + { + q.cond_flip_sign(x.sign() != y.sign()); + + if(x.is_negative() && r.is_nonzero()) + { + q -= 1; + r = y.abs() - r; + } + } + +inline bool division_check(word q, word y2, word y1, + word x3, word x2, word x1) + { + /* + Compute (y3,y2,y1) = (y2,y1) * q + and return true if (y3,y2,y1) > (x3,x2,x1) + */ + + word y3 = 0; + y1 = word_madd2(q, y1, &y3); + y2 = word_madd2(q, y2, &y3); + + const word x[3] = { x1, x2, x3 }; + const word y[3] = { y1, y2, y3 }; + + return bigint_ct_is_lt(x, 3, y, 3).is_set(); + } + +} + +void ct_divide(const BigInt& x, const BigInt& y, BigInt& q_out, BigInt& r_out) + { + const size_t x_words = x.sig_words(); + const size_t y_words = y.sig_words(); + + const size_t x_bits = x.bits(); + + BigInt q(BigInt::Positive, x_words); + BigInt r(BigInt::Positive, y_words); + BigInt t(BigInt::Positive, y_words); // a temporary + + for(size_t i = 0; i != x_bits; ++i) + { + const size_t b = x_bits - 1 - i; + const bool x_b = x.get_bit(b); + + r *= 2; + r.conditionally_set_bit(0, x_b); + + const bool r_gte_y = bigint_sub3(t.mutable_data(), r.data(), r.size(), y.data(), y_words) == 0; + + q.conditionally_set_bit(b, r_gte_y); + r.ct_cond_swap(r_gte_y, t); + } + + sign_fixup(x, y, q, r); + r_out = r; + q_out = q; + } + +void ct_divide_u8(const BigInt& x, uint8_t y, BigInt& q_out, uint8_t& r_out) + { + const size_t x_words = x.sig_words(); + const size_t x_bits = x.bits(); + + BigInt q(BigInt::Positive, x_words); + uint32_t r = 0; + + for(size_t i = 0; i != x_bits; ++i) + { + const size_t b = x_bits - 1 - i; + const bool x_b = x.get_bit(b); + + r *= 2; + r += x_b; + + const auto r_gte_y = CT::Mask<uint32_t>::is_gte(r, y); + + q.conditionally_set_bit(b, r_gte_y.is_set()); + r = r_gte_y.select(r - y, r); + } + + if(x.is_negative()) + { + q.flip_sign(); + if(r != 0) + { + --q; + r = y - r; + } + } + + r_out = static_cast<uint8_t>(r); + q_out = q; + } + +BigInt ct_modulo(const BigInt& x, const BigInt& y) + { + if(y.is_negative() || y.is_zero()) + throw Invalid_Argument("ct_modulo requires y > 0"); + + const size_t y_words = y.sig_words(); + + const size_t x_bits = x.bits(); + + BigInt r(BigInt::Positive, y_words); + BigInt t(BigInt::Positive, y_words); + + for(size_t i = 0; i != x_bits; ++i) + { + const size_t b = x_bits - 1 - i; + const bool x_b = x.get_bit(b); + + r *= 2; + r.conditionally_set_bit(0, x_b); + + const bool r_gte_y = bigint_sub3(t.mutable_data(), r.data(), r.size(), y.data(), y_words) == 0; + + r.ct_cond_swap(r_gte_y, t); + } + + if(x.is_negative()) + { + if(r.is_nonzero()) + { + r = y - r; + } + } + + return r; + } + +/* +* Solve x = q * y + r +* +* See Handbook of Applied Cryptography section 14.2.5 +*/ +void vartime_divide(const BigInt& x, const BigInt& y_arg, BigInt& q_out, BigInt& r_out) + { + if(y_arg.is_zero()) + throw BigInt::DivideByZero(); + + const size_t y_words = y_arg.sig_words(); + + BOTAN_ASSERT_NOMSG(y_words > 0); + + BigInt y = y_arg; + + BigInt r = x; + BigInt q = 0; + secure_vector<word> ws; + + r.set_sign(BigInt::Positive); + y.set_sign(BigInt::Positive); + + // Calculate shifts needed to normalize y with high bit set + const size_t shifts = y.top_bits_free(); + + y <<= shifts; + r <<= shifts; + + // we know y has not changed size, since we only shifted up to set high bit + const size_t t = y_words - 1; + const size_t n = std::max(y_words, r.sig_words()) - 1; // r may have changed size however + + BOTAN_ASSERT_NOMSG(n >= t); + + q.grow_to(n - t + 1); + + word* q_words = q.mutable_data(); + + BigInt shifted_y = y << (BOTAN_MP_WORD_BITS * (n-t)); + + // Set q_{n-t} to number of times r > shifted_y + q_words[n-t] = r.reduce_below(shifted_y, ws); + + const word y_t0 = y.word_at(t); + const word y_t1 = y.word_at(t-1); + BOTAN_DEBUG_ASSERT((y_t0 >> (BOTAN_MP_WORD_BITS-1)) == 1); + + for(size_t j = n; j != t; --j) + { + const word x_j0 = r.word_at(j); + const word x_j1 = r.word_at(j-1); + const word x_j2 = r.word_at(j-2); + + word qjt = bigint_divop(x_j0, x_j1, y_t0); + + qjt = CT::Mask<word>::is_equal(x_j0, y_t0).select(MP_WORD_MAX, qjt); + + // Per HAC 14.23, this operation is required at most twice + qjt -= division_check(qjt, y_t0, y_t1, x_j0, x_j1, x_j2); + qjt -= division_check(qjt, y_t0, y_t1, x_j0, x_j1, x_j2); + BOTAN_DEBUG_ASSERT(division_check(qjt, y_t0, y_t1, x_j0, x_j1, x_j2) == false); + + shifted_y >>= BOTAN_MP_WORD_BITS; + // Now shifted_y == y << (BOTAN_MP_WORD_BITS * (j-t-1)) + + // TODO this sequence could be better + r -= qjt * shifted_y; + qjt -= r.is_negative(); + r += static_cast<word>(r.is_negative()) * shifted_y; + + q_words[j-t-1] = qjt; + } + + r >>= shifts; + + sign_fixup(x, y_arg, q, r); + + r_out = r; + q_out = q; + } + +} diff --git a/comm/third_party/botan/src/lib/math/bigint/divide.h b/comm/third_party/botan/src/lib/math/bigint/divide.h new file mode 100644 index 0000000000..47141b3e7f --- /dev/null +++ b/comm/third_party/botan/src/lib/math/bigint/divide.h @@ -0,0 +1,101 @@ +/* +* Division +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_DIVISON_ALGORITHM_H_ +#define BOTAN_DIVISON_ALGORITHM_H_ + +#include <botan/bigint.h> + +BOTAN_FUTURE_INTERNAL_HEADER(divide.h) + +namespace Botan { + +/** +* BigInt Division +* @param x an integer +* @param y a non-zero integer +* @param q will be set to x / y +* @param r will be set to x % y +*/ +void BOTAN_UNSTABLE_API vartime_divide(const BigInt& x, + const BigInt& y, + BigInt& q, + BigInt& r); + +/** +* BigInt division, const time variant +* +* This runs with control flow independent of the values of x/y. +* Warning: the loop bounds still leak the sizes of x and y. +* +* @param x an integer +* @param y a non-zero integer +* @param q will be set to x / y +* @param r will be set to x % y +*/ +void BOTAN_PUBLIC_API(2,9) ct_divide(const BigInt& x, + const BigInt& y, + BigInt& q, + BigInt& r); + +inline void divide(const BigInt& x, + const BigInt& y, + BigInt& q, + BigInt& r) + { + ct_divide(x, y, q, r); + } + +/** +* BigInt division, const time variant +* +* This runs with control flow independent of the values of x/y. +* Warning: the loop bounds still leak the sizes of x and y. +* +* @param x an integer +* @param y a non-zero integer +* @return x/y with remainder discarded +*/ +inline BigInt ct_divide(const BigInt& x, const BigInt& y) + { + BigInt q, r; + ct_divide(x, y, q, r); + return q; + } + +/** +* BigInt division, const time variant +* +* This runs with control flow independent of the values of x/y. +* Warning: the loop bounds still leak the sizes of x and y. +* +* @param x an integer +* @param y a non-zero integer +* @param q will be set to x / y +* @param r will be set to x % y +*/ +void BOTAN_PUBLIC_API(2,9) ct_divide_u8(const BigInt& x, + uint8_t y, + BigInt& q, + uint8_t& r); + +/** +* BigInt modulo, const time variant +* +* Using this function is (slightly) cheaper than calling ct_divide and +* using only the remainder. +* +* @param x a non-negative integer +* @param modulo a positive integer +* @return result x % modulo +*/ +BigInt BOTAN_PUBLIC_API(2,9) ct_modulo(const BigInt& x, + const BigInt& modulo); + +} + +#endif diff --git a/comm/third_party/botan/src/lib/math/bigint/info.txt b/comm/third_party/botan/src/lib/math/bigint/info.txt new file mode 100644 index 0000000000..974366b810 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/bigint/info.txt @@ -0,0 +1,14 @@ +<defines> +BIGINT -> 20131128 +</defines> + +<header:public> +bigint.h +divide.h +</header:public> + +<requires> +mp +hex +rng +</requires> diff --git a/comm/third_party/botan/src/lib/math/mp/info.txt b/comm/third_party/botan/src/lib/math/mp/info.txt new file mode 100644 index 0000000000..cee4325ed8 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/mp/info.txt @@ -0,0 +1,10 @@ +<defines> +BIGINT_MP -> 20151225 +</defines> + +<header:internal> +mp_core.h +mp_madd.h +mp_asmi.h +mp_monty.h +</header:internal> diff --git a/comm/third_party/botan/src/lib/math/mp/mp_asmi.h b/comm/third_party/botan/src/lib/math/mp/mp_asmi.h new file mode 100644 index 0000000000..e1518d51c7 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/mp/mp_asmi.h @@ -0,0 +1,611 @@ +/* +* Lowest Level MPI Algorithms +* (C) 1999-2010 Jack Lloyd +* 2006 Luca Piccarreta +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_MP_ASM_INTERNAL_H_ +#define BOTAN_MP_ASM_INTERNAL_H_ + +#include <botan/internal/mp_madd.h> + +namespace Botan { + +#if defined(BOTAN_MP_USE_X86_32_ASM) + +#define ADDSUB2_OP(OPERATION, INDEX) \ + ASM("movl 4*" #INDEX "(%[y]), %[carry]") \ + ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \ + +#define ADDSUB3_OP(OPERATION, INDEX) \ + ASM("movl 4*" #INDEX "(%[x]), %[carry]") \ + ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \ + ASM("movl %[carry], 4*" #INDEX "(%[z])") \ + +#define LINMUL_OP(WRITE_TO, INDEX) \ + ASM("movl 4*" #INDEX "(%[x]),%%eax") \ + ASM("mull %[y]") \ + ASM("addl %[carry],%%eax") \ + ASM("adcl $0,%%edx") \ + ASM("movl %%edx,%[carry]") \ + ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])") + +#define MULADD_OP(IGNORED, INDEX) \ + ASM("movl 4*" #INDEX "(%[x]),%%eax") \ + ASM("mull %[y]") \ + ASM("addl %[carry],%%eax") \ + ASM("adcl $0,%%edx") \ + ASM("addl 4*" #INDEX "(%[z]),%%eax") \ + ASM("adcl $0,%%edx") \ + ASM("movl %%edx,%[carry]") \ + ASM("movl %%eax, 4*" #INDEX " (%[z])") + +#define ADD_OR_SUBTRACT(CORE_CODE) \ + ASM("rorl %[carry]") \ + CORE_CODE \ + ASM("sbbl %[carry],%[carry]") \ + ASM("negl %[carry]") + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + +#define ADDSUB2_OP(OPERATION, INDEX) \ + ASM("movq 8*" #INDEX "(%[y]), %[carry]") \ + ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \ + +#define ADDSUB3_OP(OPERATION, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]), %[carry]") \ + ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \ + ASM("movq %[carry], 8*" #INDEX "(%[z])") \ + +#define LINMUL_OP(WRITE_TO, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]),%%rax") \ + ASM("mulq %[y]") \ + ASM("addq %[carry],%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("movq %%rdx,%[carry]") \ + ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])") + +#define MULADD_OP(IGNORED, INDEX) \ + ASM("movq 8*" #INDEX "(%[x]),%%rax") \ + ASM("mulq %[y]") \ + ASM("addq %[carry],%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("addq 8*" #INDEX "(%[z]),%%rax") \ + ASM("adcq $0,%%rdx") \ + ASM("movq %%rdx,%[carry]") \ + ASM("movq %%rax, 8*" #INDEX " (%[z])") + +#define ADD_OR_SUBTRACT(CORE_CODE) \ + ASM("rorq %[carry]") \ + CORE_CODE \ + ASM("sbbq %[carry],%[carry]") \ + ASM("negq %[carry]") + +#endif + +#if defined(ADD_OR_SUBTRACT) + +#define ASM(x) x "\n\t" + +#define DO_8_TIMES(MACRO, ARG) \ + MACRO(ARG, 0) \ + MACRO(ARG, 1) \ + MACRO(ARG, 2) \ + MACRO(ARG, 3) \ + MACRO(ARG, 4) \ + MACRO(ARG, 5) \ + MACRO(ARG, 6) \ + MACRO(ARG, 7) + +#endif + +/* +* Word Addition +*/ +inline word word_add(word x, word y, word* carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + +#else + word z = x + y; + word c1 = (z < x); + z += *carry; + *carry = c1 | (z < *carry); + return z; +#endif + } + +/* +* Eight Word Block Addition, Two Argument +*/ +inline word word8_add2(word x[8], const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + +#else + x[0] = word_add(x[0], y[0], &carry); + x[1] = word_add(x[1], y[1], &carry); + x[2] = word_add(x[2], y[2], &carry); + x[3] = word_add(x[3], y[3], &carry); + x[4] = word_add(x[4], y[4], &carry); + x[5] = word_add(x[5], y[5], &carry); + x[6] = word_add(x[6], y[6], &carry); + x[7] = word_add(x[7], y[7], &carry); + return carry; +#endif + } + +/* +* Eight Word Block Addition, Three Argument +*/ +inline word word8_add3(word z[8], const word x[8], + const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + +#else + z[0] = word_add(x[0], y[0], &carry); + z[1] = word_add(x[1], y[1], &carry); + z[2] = word_add(x[2], y[2], &carry); + z[3] = word_add(x[3], y[3], &carry); + z[4] = word_add(x[4], y[4], &carry); + z[5] = word_add(x[5], y[5], &carry); + z[6] = word_add(x[6], y[6], &carry); + z[7] = word_add(x[7], y[7], &carry); + return carry; +#endif + } + +/* +* Word Subtraction +*/ +inline word word_sub(word x, word y, word* carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]")) + : [x]"=r"(x), [carry]"=r"(*carry) + : "0"(x), [y]"rm"(y), "1"(*carry) + : "cc"); + return x; + +#else + word t0 = x - y; + word c1 = (t0 > x); + word z = t0 - *carry; + *carry = c1 | (z > t0); + return z; +#endif + } + +/* +* Eight Word Block Subtraction, Two Argument +*/ +inline word word8_sub2(word x[8], const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), "0"(carry) + : "cc", "memory"); + return carry; + +#else + x[0] = word_sub(x[0], y[0], &carry); + x[1] = word_sub(x[1], y[1], &carry); + x[2] = word_sub(x[2], y[2], &carry); + x[3] = word_sub(x[3], y[3], &carry); + x[4] = word_sub(x[4], y[4], &carry); + x[5] = word_sub(x[5], y[5], &carry); + x[6] = word_sub(x[6], y[6], &carry); + x[7] = word_sub(x[7], y[7], &carry); + return carry; +#endif + } + +/* +* Eight Word Block Subtraction, Two Argument +*/ +inline word word8_sub2_rev(word x[8], const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) + : [carry]"=r"(carry) + : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) + : [carry]"=r"(carry) + : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) + : "cc", "memory"); + return carry; + +#else + x[0] = word_sub(y[0], x[0], &carry); + x[1] = word_sub(y[1], x[1], &carry); + x[2] = word_sub(y[2], x[2], &carry); + x[3] = word_sub(y[3], x[3], &carry); + x[4] = word_sub(y[4], x[4], &carry); + x[5] = word_sub(y[5], x[5], &carry); + x[6] = word_sub(y[6], x[6], &carry); + x[7] = word_sub(y[7], x[7], &carry); + return carry; +#endif + } + +/* +* Eight Word Block Subtraction, Three Argument +*/ +inline word word8_sub3(word z[8], const word x[8], + const word y[8], word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) + : [carry]"=r"(carry) + : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) + : "cc", "memory"); + return carry; + +#else + z[0] = word_sub(x[0], y[0], &carry); + z[1] = word_sub(x[1], y[1], &carry); + z[2] = word_sub(x[2], y[2], &carry); + z[3] = word_sub(x[3], y[3], &carry); + z[4] = word_sub(x[4], y[4], &carry); + z[5] = word_sub(x[5], y[5], &carry); + z[6] = word_sub(x[6], y[6], &carry); + z[7] = word_sub(x[7], y[7], &carry); + return carry; +#endif + } + +/* +* Eight Word Block Linear Multiplication +*/ +inline word word8_linmul2(word x[8], word y, word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + DO_8_TIMES(LINMUL_OP, "x") + : [carry]"=r"(carry) + : [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%eax", "%edx"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + DO_8_TIMES(LINMUL_OP, "x") + : [carry]"=r"(carry) + : [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + +#else + x[0] = word_madd2(x[0], y, &carry); + x[1] = word_madd2(x[1], y, &carry); + x[2] = word_madd2(x[2], y, &carry); + x[3] = word_madd2(x[3], y, &carry); + x[4] = word_madd2(x[4], y, &carry); + x[5] = word_madd2(x[5], y, &carry); + x[6] = word_madd2(x[6], y, &carry); + x[7] = word_madd2(x[7], y, &carry); + return carry; +#endif + } + +/* +* Eight Word Block Linear Multiplication +*/ +inline word word8_linmul3(word z[8], const word x[8], word y, word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + DO_8_TIMES(LINMUL_OP, "z") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%eax", "%edx"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + asm( + DO_8_TIMES(LINMUL_OP, "z") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + +#else + z[0] = word_madd2(x[0], y, &carry); + z[1] = word_madd2(x[1], y, &carry); + z[2] = word_madd2(x[2], y, &carry); + z[3] = word_madd2(x[3], y, &carry); + z[4] = word_madd2(x[4], y, &carry); + z[5] = word_madd2(x[5], y, &carry); + z[6] = word_madd2(x[6], y, &carry); + z[7] = word_madd2(x[7], y, &carry); + return carry; +#endif + } + +/* +* Eight Word Block Multiply/Add +*/ +inline word word8_madd3(word z[8], const word x[8], word y, word carry) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm( + DO_8_TIMES(MULADD_OP, "") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%eax", "%edx"); + return carry; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm( + DO_8_TIMES(MULADD_OP, "") + : [carry]"=r"(carry) + : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) + : "cc", "%rax", "%rdx"); + return carry; + +#else + z[0] = word_madd3(x[0], y, z[0], &carry); + z[1] = word_madd3(x[1], y, z[1], &carry); + z[2] = word_madd3(x[2], y, z[2], &carry); + z[3] = word_madd3(x[3], y, z[3], &carry); + z[4] = word_madd3(x[4], y, z[4], &carry); + z[5] = word_madd3(x[5], y, z[5], &carry); + z[6] = word_madd3(x[6], y, z[6], &carry); + z[7] = word_madd3(x[7], y, z[7], &carry); + return carry; +#endif + } + +/* +* Multiply-Add Accumulator +* (w2,w1,w0) += x * y +*/ +inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + word z0 = 0, z1 = 0; + + asm("mull %[y]" + : "=a"(z0),"=d"(z1) + : "a"(x), [y]"rm"(y) + : "cc"); + + asm(R"( + addl %[z0],%[w0] + adcl %[z1],%[w1] + adcl $0,%[w2] + )" + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + word z0 = 0, z1 = 0; + + asm("mulq %[y]" + : "=a"(z0),"=d"(z1) + : "a"(x), [y]"rm"(y) + : "cc"); + + asm(R"( + addq %[z0],%[w0] + adcq %[z1],%[w1] + adcq $0,%[w2] + )" + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#else + word carry = *w0; + *w0 = word_madd2(x, y, &carry); + *w1 += carry; + *w2 += (*w1 < carry); +#endif + } + +/* +* 3-word addition +* (w2,w1,w0) += x +*/ +inline void word3_add(word* w2, word* w1, word* w0, word x) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm(R"( + addl %[x],%[w0] + adcl $0,%[w1] + adcl $0,%[w2] + )" + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + asm(R"( + addq %[x],%[w0] + adcq $0,%[w1] + adcq $0,%[w2] + )" + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#else + *w0 += x; + word c1 = (*w0 < x); + *w1 += c1; + word c2 = (*w1 < c1); + *w2 += c2; +#endif + } + +/* +* Multiply-Add Accumulator +* (w2,w1,w0) += 2 * x * y +*/ +inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + + word z0 = 0, z1 = 0; + + asm("mull %[y]" + : "=a"(z0),"=d"(z1) + : "a"(x), [y]"rm"(y) + : "cc"); + + asm(R"( + addl %[z0],%[w0] + adcl %[z1],%[w1] + adcl $0,%[w2] + + addl %[z0],%[w0] + adcl %[z1],%[w1] + adcl $0,%[w2] + )" + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + + word z0 = 0, z1 = 0; + + asm("mulq %[y]" + : "=a"(z0),"=d"(z1) + : "a"(x), [y]"rm"(y) + : "cc"); + + asm(R"( + addq %[z0],%[w0] + adcq %[z1],%[w1] + adcq $0,%[w2] + + addq %[z0],%[w0] + adcq %[z1],%[w1] + adcq $0,%[w2] + )" + : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) + : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) + : "cc"); + +#else + word carry = 0; + x = word_madd2(x, y, &carry); + y = carry; + + word top = (y >> (BOTAN_MP_WORD_BITS-1)); + y <<= 1; + y |= (x >> (BOTAN_MP_WORD_BITS-1)); + x <<= 1; + + carry = 0; + *w0 = word_add(*w0, x, &carry); + *w1 = word_add(*w1, y, &carry); + *w2 = word_add(*w2, top, &carry); +#endif + } + +#if defined(ASM) + #undef ASM + #undef DO_8_TIMES + #undef ADD_OR_SUBTRACT + #undef ADDSUB2_OP + #undef ADDSUB3_OP + #undef LINMUL_OP + #undef MULADD_OP +#endif + +} + +#endif diff --git a/comm/third_party/botan/src/lib/math/mp/mp_comba.cpp b/comm/third_party/botan/src/lib/math/mp/mp_comba.cpp new file mode 100644 index 0000000000..ec527224c8 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/mp/mp_comba.cpp @@ -0,0 +1,2211 @@ +/* +* Comba Multiplication and Squaring +* +* This file was automatically generated by ./src/scripts/comba.py on 2018-05-08 +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/internal/mp_core.h> +#include <botan/internal/mp_asmi.h> + +namespace Botan { + +/* +* Comba 4x4 Squaring +*/ +void bigint_comba_sqr4(word z[8], const word x[4]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd (&w2, &w1, &w0, x[ 0], x[ 0]); + z[ 0] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]); + z[ 1] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]); + word3_muladd (&w1, &w0, &w2, x[ 1], x[ 1]); + z[ 2] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]); + z[ 3] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]); + word3_muladd (&w0, &w2, &w1, x[ 2], x[ 2]); + z[ 4] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]); + z[ 5] = w2; w2 = 0; + + word3_muladd (&w2, &w1, &w0, x[ 3], x[ 3]); + z[ 6] = w0; + z[ 7] = w1; + } + +/* +* Comba 4x4 Multiplication +*/ +void bigint_comba_mul4(word z[8], const word x[4], const word y[4]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]); + z[ 0] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 0]); + z[ 1] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 0]); + z[ 2] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]); + z[ 3] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 1]); + z[ 4] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[ 2]); + z[ 5] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]); + z[ 6] = w0; + z[ 7] = w1; + } + +/* +* Comba 6x6 Squaring +*/ +void bigint_comba_sqr6(word z[12], const word x[6]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd (&w2, &w1, &w0, x[ 0], x[ 0]); + z[ 0] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]); + z[ 1] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]); + word3_muladd (&w1, &w0, &w2, x[ 1], x[ 1]); + z[ 2] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]); + z[ 3] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 4]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]); + word3_muladd (&w0, &w2, &w1, x[ 2], x[ 2]); + z[ 4] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 5]); + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 4]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]); + z[ 5] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]); + word3_muladd (&w2, &w1, &w0, x[ 3], x[ 3]); + z[ 6] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 5]); + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 4]); + z[ 7] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 5]); + word3_muladd (&w1, &w0, &w2, x[ 4], x[ 4]); + z[ 8] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]); + z[ 9] = w0; w0 = 0; + + word3_muladd (&w0, &w2, &w1, x[ 5], x[ 5]); + z[10] = w1; + z[11] = w2; + } + +/* +* Comba 6x6 Multiplication +*/ +void bigint_comba_mul6(word z[12], const word x[6], const word y[6]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]); + z[ 0] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 0]); + z[ 1] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 0]); + z[ 2] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]); + z[ 3] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 0]); + z[ 4] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 0]); + z[ 5] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 1]); + z[ 6] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 2], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[ 2]); + z[ 7] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 3], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 3]); + z[ 8] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 4]); + z[ 9] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 5], y[ 5]); + z[10] = w1; + z[11] = w2; + } + +/* +* Comba 8x8 Squaring +*/ +void bigint_comba_sqr8(word z[16], const word x[8]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd (&w2, &w1, &w0, x[ 0], x[ 0]); + z[ 0] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]); + z[ 1] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]); + word3_muladd (&w1, &w0, &w2, x[ 1], x[ 1]); + z[ 2] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]); + z[ 3] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 4]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]); + word3_muladd (&w0, &w2, &w1, x[ 2], x[ 2]); + z[ 4] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 5]); + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 4]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]); + z[ 5] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]); + word3_muladd (&w2, &w1, &w0, x[ 3], x[ 3]); + z[ 6] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 7]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 6]); + word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 5]); + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 4]); + z[ 7] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 7]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 6]); + word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 5]); + word3_muladd (&w1, &w0, &w2, x[ 4], x[ 4]); + z[ 8] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]); + z[ 9] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 7]); + word3_muladd_2(&w0, &w2, &w1, x[ 4], x[ 6]); + word3_muladd (&w0, &w2, &w1, x[ 5], x[ 5]); + z[10] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 4], x[ 7]); + word3_muladd_2(&w1, &w0, &w2, x[ 5], x[ 6]); + z[11] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]); + word3_muladd (&w2, &w1, &w0, x[ 6], x[ 6]); + z[12] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 6], x[ 7]); + z[13] = w1; w1 = 0; + + word3_muladd (&w1, &w0, &w2, x[ 7], x[ 7]); + z[14] = w2; + z[15] = w0; + } + +/* +* Comba 8x8 Multiplication +*/ +void bigint_comba_mul8(word z[16], const word x[8], const word y[8]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]); + z[ 0] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 0]); + z[ 1] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 0]); + z[ 2] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]); + z[ 3] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 0]); + z[ 4] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 0]); + z[ 5] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 0]); + z[ 6] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[ 0]); + z[ 7] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[ 1]); + z[ 8] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 2]); + z[ 9] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[ 3]); + z[10] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 4], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[ 4]); + z[11] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 5]); + z[12] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 6], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[ 6]); + z[13] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 7], y[ 7]); + z[14] = w2; + z[15] = w0; + } + +/* +* Comba 9x9 Squaring +*/ +void bigint_comba_sqr9(word z[18], const word x[9]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd (&w2, &w1, &w0, x[ 0], x[ 0]); + z[ 0] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]); + z[ 1] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]); + word3_muladd (&w1, &w0, &w2, x[ 1], x[ 1]); + z[ 2] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]); + z[ 3] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 4]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]); + word3_muladd (&w0, &w2, &w1, x[ 2], x[ 2]); + z[ 4] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 5]); + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 4]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]); + z[ 5] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]); + word3_muladd (&w2, &w1, &w0, x[ 3], x[ 3]); + z[ 6] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 7]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 6]); + word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 5]); + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 4]); + z[ 7] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 8]); + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 7]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 6]); + word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 5]); + word3_muladd (&w1, &w0, &w2, x[ 4], x[ 4]); + z[ 8] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 8]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]); + z[ 9] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 8]); + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 7]); + word3_muladd_2(&w0, &w2, &w1, x[ 4], x[ 6]); + word3_muladd (&w0, &w2, &w1, x[ 5], x[ 5]); + z[10] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 8]); + word3_muladd_2(&w1, &w0, &w2, x[ 4], x[ 7]); + word3_muladd_2(&w1, &w0, &w2, x[ 5], x[ 6]); + z[11] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 8]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]); + word3_muladd (&w2, &w1, &w0, x[ 6], x[ 6]); + z[12] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 5], x[ 8]); + word3_muladd_2(&w0, &w2, &w1, x[ 6], x[ 7]); + z[13] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 6], x[ 8]); + word3_muladd (&w1, &w0, &w2, x[ 7], x[ 7]); + z[14] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 8]); + z[15] = w0; w0 = 0; + + word3_muladd (&w0, &w2, &w1, x[ 8], x[ 8]); + z[16] = w1; + z[17] = w2; + } + +/* +* Comba 9x9 Multiplication +*/ +void bigint_comba_mul9(word z[18], const word x[9], const word y[9]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]); + z[ 0] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 0]); + z[ 1] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 0]); + z[ 2] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]); + z[ 3] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 0]); + z[ 4] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 0]); + z[ 5] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 0]); + z[ 6] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[ 0]); + z[ 7] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[ 0]); + z[ 8] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 1]); + z[ 9] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 2], y[ 8]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 8], y[ 2]); + z[10] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 3], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[ 3]); + z[11] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 4]); + z[12] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 5], y[ 8]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[ 8], y[ 5]); + z[13] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 6], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[ 6]); + z[14] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 7]); + z[15] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 8], y[ 8]); + z[16] = w1; + z[17] = w2; + } + +/* +* Comba 16x16 Squaring +*/ +void bigint_comba_sqr16(word z[32], const word x[16]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd (&w2, &w1, &w0, x[ 0], x[ 0]); + z[ 0] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]); + z[ 1] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]); + word3_muladd (&w1, &w0, &w2, x[ 1], x[ 1]); + z[ 2] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]); + z[ 3] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 4]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]); + word3_muladd (&w0, &w2, &w1, x[ 2], x[ 2]); + z[ 4] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 5]); + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 4]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]); + z[ 5] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]); + word3_muladd (&w2, &w1, &w0, x[ 3], x[ 3]); + z[ 6] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 7]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 6]); + word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 5]); + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 4]); + z[ 7] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 8]); + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 7]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 6]); + word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 5]); + word3_muladd (&w1, &w0, &w2, x[ 4], x[ 4]); + z[ 8] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 9]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 8]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]); + z[ 9] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[10]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 9]); + word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 8]); + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 7]); + word3_muladd_2(&w0, &w2, &w1, x[ 4], x[ 6]); + word3_muladd (&w0, &w2, &w1, x[ 5], x[ 5]); + z[10] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[11]); + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[10]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 9]); + word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 8]); + word3_muladd_2(&w1, &w0, &w2, x[ 4], x[ 7]); + word3_muladd_2(&w1, &w0, &w2, x[ 5], x[ 6]); + z[11] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[11]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[10]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 9]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 8]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]); + word3_muladd (&w2, &w1, &w0, x[ 6], x[ 6]); + z[12] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[13]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[12]); + word3_muladd_2(&w0, &w2, &w1, x[ 2], x[11]); + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[10]); + word3_muladd_2(&w0, &w2, &w1, x[ 4], x[ 9]); + word3_muladd_2(&w0, &w2, &w1, x[ 5], x[ 8]); + word3_muladd_2(&w0, &w2, &w1, x[ 6], x[ 7]); + z[13] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[14]); + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[13]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[12]); + word3_muladd_2(&w1, &w0, &w2, x[ 3], x[11]); + word3_muladd_2(&w1, &w0, &w2, x[ 4], x[10]); + word3_muladd_2(&w1, &w0, &w2, x[ 5], x[ 9]); + word3_muladd_2(&w1, &w0, &w2, x[ 6], x[ 8]); + word3_muladd (&w1, &w0, &w2, x[ 7], x[ 7]); + z[14] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[13]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[11]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[10]); + word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 9]); + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 8]); + z[15] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[15]); + word3_muladd_2(&w0, &w2, &w1, x[ 2], x[14]); + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[13]); + word3_muladd_2(&w0, &w2, &w1, x[ 4], x[12]); + word3_muladd_2(&w0, &w2, &w1, x[ 5], x[11]); + word3_muladd_2(&w0, &w2, &w1, x[ 6], x[10]); + word3_muladd_2(&w0, &w2, &w1, x[ 7], x[ 9]); + word3_muladd (&w0, &w2, &w1, x[ 8], x[ 8]); + z[16] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[15]); + word3_muladd_2(&w1, &w0, &w2, x[ 3], x[14]); + word3_muladd_2(&w1, &w0, &w2, x[ 4], x[13]); + word3_muladd_2(&w1, &w0, &w2, x[ 5], x[12]); + word3_muladd_2(&w1, &w0, &w2, x[ 6], x[11]); + word3_muladd_2(&w1, &w0, &w2, x[ 7], x[10]); + word3_muladd_2(&w1, &w0, &w2, x[ 8], x[ 9]); + z[17] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[13]); + word3_muladd_2(&w2, &w1, &w0, x[ 6], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[11]); + word3_muladd_2(&w2, &w1, &w0, x[ 8], x[10]); + word3_muladd (&w2, &w1, &w0, x[ 9], x[ 9]); + z[18] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 4], x[15]); + word3_muladd_2(&w0, &w2, &w1, x[ 5], x[14]); + word3_muladd_2(&w0, &w2, &w1, x[ 6], x[13]); + word3_muladd_2(&w0, &w2, &w1, x[ 7], x[12]); + word3_muladd_2(&w0, &w2, &w1, x[ 8], x[11]); + word3_muladd_2(&w0, &w2, &w1, x[ 9], x[10]); + z[19] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 5], x[15]); + word3_muladd_2(&w1, &w0, &w2, x[ 6], x[14]); + word3_muladd_2(&w1, &w0, &w2, x[ 7], x[13]); + word3_muladd_2(&w1, &w0, &w2, x[ 8], x[12]); + word3_muladd_2(&w1, &w0, &w2, x[ 9], x[11]); + word3_muladd (&w1, &w0, &w2, x[10], x[10]); + z[20] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 6], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[ 8], x[13]); + word3_muladd_2(&w2, &w1, &w0, x[ 9], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[10], x[11]); + z[21] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 7], x[15]); + word3_muladd_2(&w0, &w2, &w1, x[ 8], x[14]); + word3_muladd_2(&w0, &w2, &w1, x[ 9], x[13]); + word3_muladd_2(&w0, &w2, &w1, x[10], x[12]); + word3_muladd (&w0, &w2, &w1, x[11], x[11]); + z[22] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 8], x[15]); + word3_muladd_2(&w1, &w0, &w2, x[ 9], x[14]); + word3_muladd_2(&w1, &w0, &w2, x[10], x[13]); + word3_muladd_2(&w1, &w0, &w2, x[11], x[12]); + z[23] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 9], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[10], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[11], x[13]); + word3_muladd (&w2, &w1, &w0, x[12], x[12]); + z[24] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[10], x[15]); + word3_muladd_2(&w0, &w2, &w1, x[11], x[14]); + word3_muladd_2(&w0, &w2, &w1, x[12], x[13]); + z[25] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[11], x[15]); + word3_muladd_2(&w1, &w0, &w2, x[12], x[14]); + word3_muladd (&w1, &w0, &w2, x[13], x[13]); + z[26] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[12], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[13], x[14]); + z[27] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[13], x[15]); + word3_muladd (&w0, &w2, &w1, x[14], x[14]); + z[28] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[14], x[15]); + z[29] = w2; w2 = 0; + + word3_muladd (&w2, &w1, &w0, x[15], x[15]); + z[30] = w0; + z[31] = w1; + } + +/* +* Comba 16x16 Multiplication +*/ +void bigint_comba_mul16(word z[32], const word x[16], const word y[16]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]); + z[ 0] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 0]); + z[ 1] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 0]); + z[ 2] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]); + z[ 3] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 0]); + z[ 4] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 0]); + z[ 5] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 0]); + z[ 6] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[ 0]); + z[ 7] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[ 0]); + z[ 8] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 0]); + z[ 9] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[10]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 9]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[ 8]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 8], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[ 9], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[10], y[ 0]); + z[10] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[11]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[10]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 9]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[ 9], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[10], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[11], y[ 0]); + z[11] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[12]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[11]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[10]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[10], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[11], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 0]); + z[12] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[13]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[12]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[11]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[10]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 9]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[ 8]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[ 8], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[ 9], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[10], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[11], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[12], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[13], y[ 0]); + z[13] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[14]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[13]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[12]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[11]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[10]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 9]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[ 9], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[10], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[11], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[12], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[13], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[14], y[ 0]); + z[14] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[15]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[14]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[13]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[12]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[11]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[10]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[10], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[11], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[13], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[14], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 0]); + z[15] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 1], y[15]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[14]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[13]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[12]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[11]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[10]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[ 9]); + word3_muladd(&w0, &w2, &w1, x[ 8], y[ 8]); + word3_muladd(&w0, &w2, &w1, x[ 9], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[10], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[11], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[12], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[13], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[14], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[15], y[ 1]); + z[16] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 2], y[15]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[14]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[13]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[12]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[11]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[10]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[ 9]); + word3_muladd(&w1, &w0, &w2, x[ 9], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[10], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[11], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[12], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[13], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[14], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[15], y[ 2]); + z[17] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 3], y[15]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[14]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[13]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[12]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[11]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[10]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[10], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[11], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[13], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[14], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 3]); + z[18] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 4], y[15]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[14]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[13]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[12]); + word3_muladd(&w0, &w2, &w1, x[ 8], y[11]); + word3_muladd(&w0, &w2, &w1, x[ 9], y[10]); + word3_muladd(&w0, &w2, &w1, x[10], y[ 9]); + word3_muladd(&w0, &w2, &w1, x[11], y[ 8]); + word3_muladd(&w0, &w2, &w1, x[12], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[13], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[14], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[15], y[ 4]); + z[19] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 5], y[15]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[14]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[13]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[12]); + word3_muladd(&w1, &w0, &w2, x[ 9], y[11]); + word3_muladd(&w1, &w0, &w2, x[10], y[10]); + word3_muladd(&w1, &w0, &w2, x[11], y[ 9]); + word3_muladd(&w1, &w0, &w2, x[12], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[13], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[14], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[15], y[ 5]); + z[20] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 6], y[15]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[14]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[13]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[12]); + word3_muladd(&w2, &w1, &w0, x[10], y[11]); + word3_muladd(&w2, &w1, &w0, x[11], y[10]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[13], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[14], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 6]); + z[21] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 7], y[15]); + word3_muladd(&w0, &w2, &w1, x[ 8], y[14]); + word3_muladd(&w0, &w2, &w1, x[ 9], y[13]); + word3_muladd(&w0, &w2, &w1, x[10], y[12]); + word3_muladd(&w0, &w2, &w1, x[11], y[11]); + word3_muladd(&w0, &w2, &w1, x[12], y[10]); + word3_muladd(&w0, &w2, &w1, x[13], y[ 9]); + word3_muladd(&w0, &w2, &w1, x[14], y[ 8]); + word3_muladd(&w0, &w2, &w1, x[15], y[ 7]); + z[22] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 8], y[15]); + word3_muladd(&w1, &w0, &w2, x[ 9], y[14]); + word3_muladd(&w1, &w0, &w2, x[10], y[13]); + word3_muladd(&w1, &w0, &w2, x[11], y[12]); + word3_muladd(&w1, &w0, &w2, x[12], y[11]); + word3_muladd(&w1, &w0, &w2, x[13], y[10]); + word3_muladd(&w1, &w0, &w2, x[14], y[ 9]); + word3_muladd(&w1, &w0, &w2, x[15], y[ 8]); + z[23] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 9], y[15]); + word3_muladd(&w2, &w1, &w0, x[10], y[14]); + word3_muladd(&w2, &w1, &w0, x[11], y[13]); + word3_muladd(&w2, &w1, &w0, x[12], y[12]); + word3_muladd(&w2, &w1, &w0, x[13], y[11]); + word3_muladd(&w2, &w1, &w0, x[14], y[10]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 9]); + z[24] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[10], y[15]); + word3_muladd(&w0, &w2, &w1, x[11], y[14]); + word3_muladd(&w0, &w2, &w1, x[12], y[13]); + word3_muladd(&w0, &w2, &w1, x[13], y[12]); + word3_muladd(&w0, &w2, &w1, x[14], y[11]); + word3_muladd(&w0, &w2, &w1, x[15], y[10]); + z[25] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[11], y[15]); + word3_muladd(&w1, &w0, &w2, x[12], y[14]); + word3_muladd(&w1, &w0, &w2, x[13], y[13]); + word3_muladd(&w1, &w0, &w2, x[14], y[12]); + word3_muladd(&w1, &w0, &w2, x[15], y[11]); + z[26] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[12], y[15]); + word3_muladd(&w2, &w1, &w0, x[13], y[14]); + word3_muladd(&w2, &w1, &w0, x[14], y[13]); + word3_muladd(&w2, &w1, &w0, x[15], y[12]); + z[27] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[13], y[15]); + word3_muladd(&w0, &w2, &w1, x[14], y[14]); + word3_muladd(&w0, &w2, &w1, x[15], y[13]); + z[28] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[14], y[15]); + word3_muladd(&w1, &w0, &w2, x[15], y[14]); + z[29] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[15], y[15]); + z[30] = w0; + z[31] = w1; + } + +/* +* Comba 24x24 Squaring +*/ +void bigint_comba_sqr24(word z[48], const word x[24]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd (&w2, &w1, &w0, x[ 0], x[ 0]); + z[ 0] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]); + z[ 1] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]); + word3_muladd (&w1, &w0, &w2, x[ 1], x[ 1]); + z[ 2] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]); + z[ 3] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 4]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]); + word3_muladd (&w0, &w2, &w1, x[ 2], x[ 2]); + z[ 4] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 5]); + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 4]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]); + z[ 5] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]); + word3_muladd (&w2, &w1, &w0, x[ 3], x[ 3]); + z[ 6] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 7]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 6]); + word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 5]); + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 4]); + z[ 7] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 8]); + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 7]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 6]); + word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 5]); + word3_muladd (&w1, &w0, &w2, x[ 4], x[ 4]); + z[ 8] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 9]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 8]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]); + z[ 9] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[10]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 9]); + word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 8]); + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 7]); + word3_muladd_2(&w0, &w2, &w1, x[ 4], x[ 6]); + word3_muladd (&w0, &w2, &w1, x[ 5], x[ 5]); + z[10] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[11]); + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[10]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 9]); + word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 8]); + word3_muladd_2(&w1, &w0, &w2, x[ 4], x[ 7]); + word3_muladd_2(&w1, &w0, &w2, x[ 5], x[ 6]); + z[11] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[11]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[10]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 9]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 8]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]); + word3_muladd (&w2, &w1, &w0, x[ 6], x[ 6]); + z[12] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[13]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[12]); + word3_muladd_2(&w0, &w2, &w1, x[ 2], x[11]); + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[10]); + word3_muladd_2(&w0, &w2, &w1, x[ 4], x[ 9]); + word3_muladd_2(&w0, &w2, &w1, x[ 5], x[ 8]); + word3_muladd_2(&w0, &w2, &w1, x[ 6], x[ 7]); + z[13] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[14]); + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[13]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[12]); + word3_muladd_2(&w1, &w0, &w2, x[ 3], x[11]); + word3_muladd_2(&w1, &w0, &w2, x[ 4], x[10]); + word3_muladd_2(&w1, &w0, &w2, x[ 5], x[ 9]); + word3_muladd_2(&w1, &w0, &w2, x[ 6], x[ 8]); + word3_muladd (&w1, &w0, &w2, x[ 7], x[ 7]); + z[14] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[13]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[11]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[10]); + word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 9]); + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 8]); + z[15] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[16]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[15]); + word3_muladd_2(&w0, &w2, &w1, x[ 2], x[14]); + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[13]); + word3_muladd_2(&w0, &w2, &w1, x[ 4], x[12]); + word3_muladd_2(&w0, &w2, &w1, x[ 5], x[11]); + word3_muladd_2(&w0, &w2, &w1, x[ 6], x[10]); + word3_muladd_2(&w0, &w2, &w1, x[ 7], x[ 9]); + word3_muladd (&w0, &w2, &w1, x[ 8], x[ 8]); + z[16] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[17]); + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[16]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[15]); + word3_muladd_2(&w1, &w0, &w2, x[ 3], x[14]); + word3_muladd_2(&w1, &w0, &w2, x[ 4], x[13]); + word3_muladd_2(&w1, &w0, &w2, x[ 5], x[12]); + word3_muladd_2(&w1, &w0, &w2, x[ 6], x[11]); + word3_muladd_2(&w1, &w0, &w2, x[ 7], x[10]); + word3_muladd_2(&w1, &w0, &w2, x[ 8], x[ 9]); + z[17] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[18]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[17]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[16]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[13]); + word3_muladd_2(&w2, &w1, &w0, x[ 6], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[11]); + word3_muladd_2(&w2, &w1, &w0, x[ 8], x[10]); + word3_muladd (&w2, &w1, &w0, x[ 9], x[ 9]); + z[18] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[19]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[18]); + word3_muladd_2(&w0, &w2, &w1, x[ 2], x[17]); + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[16]); + word3_muladd_2(&w0, &w2, &w1, x[ 4], x[15]); + word3_muladd_2(&w0, &w2, &w1, x[ 5], x[14]); + word3_muladd_2(&w0, &w2, &w1, x[ 6], x[13]); + word3_muladd_2(&w0, &w2, &w1, x[ 7], x[12]); + word3_muladd_2(&w0, &w2, &w1, x[ 8], x[11]); + word3_muladd_2(&w0, &w2, &w1, x[ 9], x[10]); + z[19] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[20]); + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[19]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[18]); + word3_muladd_2(&w1, &w0, &w2, x[ 3], x[17]); + word3_muladd_2(&w1, &w0, &w2, x[ 4], x[16]); + word3_muladd_2(&w1, &w0, &w2, x[ 5], x[15]); + word3_muladd_2(&w1, &w0, &w2, x[ 6], x[14]); + word3_muladd_2(&w1, &w0, &w2, x[ 7], x[13]); + word3_muladd_2(&w1, &w0, &w2, x[ 8], x[12]); + word3_muladd_2(&w1, &w0, &w2, x[ 9], x[11]); + word3_muladd (&w1, &w0, &w2, x[10], x[10]); + z[20] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 0], x[21]); + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[20]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[19]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[18]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[17]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[16]); + word3_muladd_2(&w2, &w1, &w0, x[ 6], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[ 8], x[13]); + word3_muladd_2(&w2, &w1, &w0, x[ 9], x[12]); + word3_muladd_2(&w2, &w1, &w0, x[10], x[11]); + z[21] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 0], x[22]); + word3_muladd_2(&w0, &w2, &w1, x[ 1], x[21]); + word3_muladd_2(&w0, &w2, &w1, x[ 2], x[20]); + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[19]); + word3_muladd_2(&w0, &w2, &w1, x[ 4], x[18]); + word3_muladd_2(&w0, &w2, &w1, x[ 5], x[17]); + word3_muladd_2(&w0, &w2, &w1, x[ 6], x[16]); + word3_muladd_2(&w0, &w2, &w1, x[ 7], x[15]); + word3_muladd_2(&w0, &w2, &w1, x[ 8], x[14]); + word3_muladd_2(&w0, &w2, &w1, x[ 9], x[13]); + word3_muladd_2(&w0, &w2, &w1, x[10], x[12]); + word3_muladd (&w0, &w2, &w1, x[11], x[11]); + z[22] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 0], x[23]); + word3_muladd_2(&w1, &w0, &w2, x[ 1], x[22]); + word3_muladd_2(&w1, &w0, &w2, x[ 2], x[21]); + word3_muladd_2(&w1, &w0, &w2, x[ 3], x[20]); + word3_muladd_2(&w1, &w0, &w2, x[ 4], x[19]); + word3_muladd_2(&w1, &w0, &w2, x[ 5], x[18]); + word3_muladd_2(&w1, &w0, &w2, x[ 6], x[17]); + word3_muladd_2(&w1, &w0, &w2, x[ 7], x[16]); + word3_muladd_2(&w1, &w0, &w2, x[ 8], x[15]); + word3_muladd_2(&w1, &w0, &w2, x[ 9], x[14]); + word3_muladd_2(&w1, &w0, &w2, x[10], x[13]); + word3_muladd_2(&w1, &w0, &w2, x[11], x[12]); + z[23] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 1], x[23]); + word3_muladd_2(&w2, &w1, &w0, x[ 2], x[22]); + word3_muladd_2(&w2, &w1, &w0, x[ 3], x[21]); + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[20]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[19]); + word3_muladd_2(&w2, &w1, &w0, x[ 6], x[18]); + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[17]); + word3_muladd_2(&w2, &w1, &w0, x[ 8], x[16]); + word3_muladd_2(&w2, &w1, &w0, x[ 9], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[10], x[14]); + word3_muladd_2(&w2, &w1, &w0, x[11], x[13]); + word3_muladd (&w2, &w1, &w0, x[12], x[12]); + z[24] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 2], x[23]); + word3_muladd_2(&w0, &w2, &w1, x[ 3], x[22]); + word3_muladd_2(&w0, &w2, &w1, x[ 4], x[21]); + word3_muladd_2(&w0, &w2, &w1, x[ 5], x[20]); + word3_muladd_2(&w0, &w2, &w1, x[ 6], x[19]); + word3_muladd_2(&w0, &w2, &w1, x[ 7], x[18]); + word3_muladd_2(&w0, &w2, &w1, x[ 8], x[17]); + word3_muladd_2(&w0, &w2, &w1, x[ 9], x[16]); + word3_muladd_2(&w0, &w2, &w1, x[10], x[15]); + word3_muladd_2(&w0, &w2, &w1, x[11], x[14]); + word3_muladd_2(&w0, &w2, &w1, x[12], x[13]); + z[25] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 3], x[23]); + word3_muladd_2(&w1, &w0, &w2, x[ 4], x[22]); + word3_muladd_2(&w1, &w0, &w2, x[ 5], x[21]); + word3_muladd_2(&w1, &w0, &w2, x[ 6], x[20]); + word3_muladd_2(&w1, &w0, &w2, x[ 7], x[19]); + word3_muladd_2(&w1, &w0, &w2, x[ 8], x[18]); + word3_muladd_2(&w1, &w0, &w2, x[ 9], x[17]); + word3_muladd_2(&w1, &w0, &w2, x[10], x[16]); + word3_muladd_2(&w1, &w0, &w2, x[11], x[15]); + word3_muladd_2(&w1, &w0, &w2, x[12], x[14]); + word3_muladd (&w1, &w0, &w2, x[13], x[13]); + z[26] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 4], x[23]); + word3_muladd_2(&w2, &w1, &w0, x[ 5], x[22]); + word3_muladd_2(&w2, &w1, &w0, x[ 6], x[21]); + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[20]); + word3_muladd_2(&w2, &w1, &w0, x[ 8], x[19]); + word3_muladd_2(&w2, &w1, &w0, x[ 9], x[18]); + word3_muladd_2(&w2, &w1, &w0, x[10], x[17]); + word3_muladd_2(&w2, &w1, &w0, x[11], x[16]); + word3_muladd_2(&w2, &w1, &w0, x[12], x[15]); + word3_muladd_2(&w2, &w1, &w0, x[13], x[14]); + z[27] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 5], x[23]); + word3_muladd_2(&w0, &w2, &w1, x[ 6], x[22]); + word3_muladd_2(&w0, &w2, &w1, x[ 7], x[21]); + word3_muladd_2(&w0, &w2, &w1, x[ 8], x[20]); + word3_muladd_2(&w0, &w2, &w1, x[ 9], x[19]); + word3_muladd_2(&w0, &w2, &w1, x[10], x[18]); + word3_muladd_2(&w0, &w2, &w1, x[11], x[17]); + word3_muladd_2(&w0, &w2, &w1, x[12], x[16]); + word3_muladd_2(&w0, &w2, &w1, x[13], x[15]); + word3_muladd (&w0, &w2, &w1, x[14], x[14]); + z[28] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 6], x[23]); + word3_muladd_2(&w1, &w0, &w2, x[ 7], x[22]); + word3_muladd_2(&w1, &w0, &w2, x[ 8], x[21]); + word3_muladd_2(&w1, &w0, &w2, x[ 9], x[20]); + word3_muladd_2(&w1, &w0, &w2, x[10], x[19]); + word3_muladd_2(&w1, &w0, &w2, x[11], x[18]); + word3_muladd_2(&w1, &w0, &w2, x[12], x[17]); + word3_muladd_2(&w1, &w0, &w2, x[13], x[16]); + word3_muladd_2(&w1, &w0, &w2, x[14], x[15]); + z[29] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[ 7], x[23]); + word3_muladd_2(&w2, &w1, &w0, x[ 8], x[22]); + word3_muladd_2(&w2, &w1, &w0, x[ 9], x[21]); + word3_muladd_2(&w2, &w1, &w0, x[10], x[20]); + word3_muladd_2(&w2, &w1, &w0, x[11], x[19]); + word3_muladd_2(&w2, &w1, &w0, x[12], x[18]); + word3_muladd_2(&w2, &w1, &w0, x[13], x[17]); + word3_muladd_2(&w2, &w1, &w0, x[14], x[16]); + word3_muladd (&w2, &w1, &w0, x[15], x[15]); + z[30] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[ 8], x[23]); + word3_muladd_2(&w0, &w2, &w1, x[ 9], x[22]); + word3_muladd_2(&w0, &w2, &w1, x[10], x[21]); + word3_muladd_2(&w0, &w2, &w1, x[11], x[20]); + word3_muladd_2(&w0, &w2, &w1, x[12], x[19]); + word3_muladd_2(&w0, &w2, &w1, x[13], x[18]); + word3_muladd_2(&w0, &w2, &w1, x[14], x[17]); + word3_muladd_2(&w0, &w2, &w1, x[15], x[16]); + z[31] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[ 9], x[23]); + word3_muladd_2(&w1, &w0, &w2, x[10], x[22]); + word3_muladd_2(&w1, &w0, &w2, x[11], x[21]); + word3_muladd_2(&w1, &w0, &w2, x[12], x[20]); + word3_muladd_2(&w1, &w0, &w2, x[13], x[19]); + word3_muladd_2(&w1, &w0, &w2, x[14], x[18]); + word3_muladd_2(&w1, &w0, &w2, x[15], x[17]); + word3_muladd (&w1, &w0, &w2, x[16], x[16]); + z[32] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[10], x[23]); + word3_muladd_2(&w2, &w1, &w0, x[11], x[22]); + word3_muladd_2(&w2, &w1, &w0, x[12], x[21]); + word3_muladd_2(&w2, &w1, &w0, x[13], x[20]); + word3_muladd_2(&w2, &w1, &w0, x[14], x[19]); + word3_muladd_2(&w2, &w1, &w0, x[15], x[18]); + word3_muladd_2(&w2, &w1, &w0, x[16], x[17]); + z[33] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[11], x[23]); + word3_muladd_2(&w0, &w2, &w1, x[12], x[22]); + word3_muladd_2(&w0, &w2, &w1, x[13], x[21]); + word3_muladd_2(&w0, &w2, &w1, x[14], x[20]); + word3_muladd_2(&w0, &w2, &w1, x[15], x[19]); + word3_muladd_2(&w0, &w2, &w1, x[16], x[18]); + word3_muladd (&w0, &w2, &w1, x[17], x[17]); + z[34] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[12], x[23]); + word3_muladd_2(&w1, &w0, &w2, x[13], x[22]); + word3_muladd_2(&w1, &w0, &w2, x[14], x[21]); + word3_muladd_2(&w1, &w0, &w2, x[15], x[20]); + word3_muladd_2(&w1, &w0, &w2, x[16], x[19]); + word3_muladd_2(&w1, &w0, &w2, x[17], x[18]); + z[35] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[13], x[23]); + word3_muladd_2(&w2, &w1, &w0, x[14], x[22]); + word3_muladd_2(&w2, &w1, &w0, x[15], x[21]); + word3_muladd_2(&w2, &w1, &w0, x[16], x[20]); + word3_muladd_2(&w2, &w1, &w0, x[17], x[19]); + word3_muladd (&w2, &w1, &w0, x[18], x[18]); + z[36] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[14], x[23]); + word3_muladd_2(&w0, &w2, &w1, x[15], x[22]); + word3_muladd_2(&w0, &w2, &w1, x[16], x[21]); + word3_muladd_2(&w0, &w2, &w1, x[17], x[20]); + word3_muladd_2(&w0, &w2, &w1, x[18], x[19]); + z[37] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[15], x[23]); + word3_muladd_2(&w1, &w0, &w2, x[16], x[22]); + word3_muladd_2(&w1, &w0, &w2, x[17], x[21]); + word3_muladd_2(&w1, &w0, &w2, x[18], x[20]); + word3_muladd (&w1, &w0, &w2, x[19], x[19]); + z[38] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[16], x[23]); + word3_muladd_2(&w2, &w1, &w0, x[17], x[22]); + word3_muladd_2(&w2, &w1, &w0, x[18], x[21]); + word3_muladd_2(&w2, &w1, &w0, x[19], x[20]); + z[39] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[17], x[23]); + word3_muladd_2(&w0, &w2, &w1, x[18], x[22]); + word3_muladd_2(&w0, &w2, &w1, x[19], x[21]); + word3_muladd (&w0, &w2, &w1, x[20], x[20]); + z[40] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[18], x[23]); + word3_muladd_2(&w1, &w0, &w2, x[19], x[22]); + word3_muladd_2(&w1, &w0, &w2, x[20], x[21]); + z[41] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[19], x[23]); + word3_muladd_2(&w2, &w1, &w0, x[20], x[22]); + word3_muladd (&w2, &w1, &w0, x[21], x[21]); + z[42] = w0; w0 = 0; + + word3_muladd_2(&w0, &w2, &w1, x[20], x[23]); + word3_muladd_2(&w0, &w2, &w1, x[21], x[22]); + z[43] = w1; w1 = 0; + + word3_muladd_2(&w1, &w0, &w2, x[21], x[23]); + word3_muladd (&w1, &w0, &w2, x[22], x[22]); + z[44] = w2; w2 = 0; + + word3_muladd_2(&w2, &w1, &w0, x[22], x[23]); + z[45] = w0; w0 = 0; + + word3_muladd (&w0, &w2, &w1, x[23], x[23]); + z[46] = w1; + z[47] = w2; + } + +/* +* Comba 24x24 Multiplication +*/ +void bigint_comba_mul24(word z[48], const word x[24], const word y[24]) + { + word w2 = 0, w1 = 0, w0 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]); + z[ 0] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 0]); + z[ 1] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 0]); + z[ 2] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]); + z[ 3] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 0]); + z[ 4] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 0]); + z[ 5] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 0]); + z[ 6] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[ 0]); + z[ 7] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[ 0]); + z[ 8] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 0]); + z[ 9] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[10]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[ 9]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[ 8]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[ 8], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[ 9], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[10], y[ 0]); + z[10] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[11]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[10]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[ 9]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[ 9], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[10], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[11], y[ 0]); + z[11] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[12]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[11]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[10]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[10], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[11], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 0]); + z[12] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[13]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[12]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[11]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[10]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[ 9]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[ 8]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[ 8], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[ 9], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[10], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[11], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[12], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[13], y[ 0]); + z[13] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[14]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[13]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[12]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[11]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[10]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[ 9]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[ 9], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[10], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[11], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[12], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[13], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[14], y[ 0]); + z[14] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[15]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[14]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[13]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[12]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[11]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[10]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[10], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[11], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[13], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[14], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 0]); + z[15] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[16]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[15]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[14]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[13]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[12]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[11]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[10]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[ 9]); + word3_muladd(&w0, &w2, &w1, x[ 8], y[ 8]); + word3_muladd(&w0, &w2, &w1, x[ 9], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[10], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[11], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[12], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[13], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[14], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[15], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[16], y[ 0]); + z[16] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[17]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[16]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[15]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[14]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[13]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[12]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[11]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[10]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[ 9]); + word3_muladd(&w1, &w0, &w2, x[ 9], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[10], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[11], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[12], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[13], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[14], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[15], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[16], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[17], y[ 0]); + z[17] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[18]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[17]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[16]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[15]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[14]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[13]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[12]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[11]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[10]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[10], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[11], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[13], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[14], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[16], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[17], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[18], y[ 0]); + z[18] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[19]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[18]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[17]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[16]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[15]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[14]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[13]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[12]); + word3_muladd(&w0, &w2, &w1, x[ 8], y[11]); + word3_muladd(&w0, &w2, &w1, x[ 9], y[10]); + word3_muladd(&w0, &w2, &w1, x[10], y[ 9]); + word3_muladd(&w0, &w2, &w1, x[11], y[ 8]); + word3_muladd(&w0, &w2, &w1, x[12], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[13], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[14], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[15], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[16], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[17], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[18], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[19], y[ 0]); + z[19] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[20]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[19]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[18]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[17]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[16]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[15]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[14]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[13]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[12]); + word3_muladd(&w1, &w0, &w2, x[ 9], y[11]); + word3_muladd(&w1, &w0, &w2, x[10], y[10]); + word3_muladd(&w1, &w0, &w2, x[11], y[ 9]); + word3_muladd(&w1, &w0, &w2, x[12], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[13], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[14], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[15], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[16], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[17], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[18], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[19], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[20], y[ 0]); + z[20] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 0], y[21]); + word3_muladd(&w2, &w1, &w0, x[ 1], y[20]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[19]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[18]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[17]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[16]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[15]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[14]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[13]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[12]); + word3_muladd(&w2, &w1, &w0, x[10], y[11]); + word3_muladd(&w2, &w1, &w0, x[11], y[10]); + word3_muladd(&w2, &w1, &w0, x[12], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[13], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[14], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[16], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[17], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[18], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[19], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[20], y[ 1]); + word3_muladd(&w2, &w1, &w0, x[21], y[ 0]); + z[21] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 0], y[22]); + word3_muladd(&w0, &w2, &w1, x[ 1], y[21]); + word3_muladd(&w0, &w2, &w1, x[ 2], y[20]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[19]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[18]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[17]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[16]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[15]); + word3_muladd(&w0, &w2, &w1, x[ 8], y[14]); + word3_muladd(&w0, &w2, &w1, x[ 9], y[13]); + word3_muladd(&w0, &w2, &w1, x[10], y[12]); + word3_muladd(&w0, &w2, &w1, x[11], y[11]); + word3_muladd(&w0, &w2, &w1, x[12], y[10]); + word3_muladd(&w0, &w2, &w1, x[13], y[ 9]); + word3_muladd(&w0, &w2, &w1, x[14], y[ 8]); + word3_muladd(&w0, &w2, &w1, x[15], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[16], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[17], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[18], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[19], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[20], y[ 2]); + word3_muladd(&w0, &w2, &w1, x[21], y[ 1]); + word3_muladd(&w0, &w2, &w1, x[22], y[ 0]); + z[22] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 0], y[23]); + word3_muladd(&w1, &w0, &w2, x[ 1], y[22]); + word3_muladd(&w1, &w0, &w2, x[ 2], y[21]); + word3_muladd(&w1, &w0, &w2, x[ 3], y[20]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[19]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[18]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[17]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[16]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[15]); + word3_muladd(&w1, &w0, &w2, x[ 9], y[14]); + word3_muladd(&w1, &w0, &w2, x[10], y[13]); + word3_muladd(&w1, &w0, &w2, x[11], y[12]); + word3_muladd(&w1, &w0, &w2, x[12], y[11]); + word3_muladd(&w1, &w0, &w2, x[13], y[10]); + word3_muladd(&w1, &w0, &w2, x[14], y[ 9]); + word3_muladd(&w1, &w0, &w2, x[15], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[16], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[17], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[18], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[19], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[20], y[ 3]); + word3_muladd(&w1, &w0, &w2, x[21], y[ 2]); + word3_muladd(&w1, &w0, &w2, x[22], y[ 1]); + word3_muladd(&w1, &w0, &w2, x[23], y[ 0]); + z[23] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 1], y[23]); + word3_muladd(&w2, &w1, &w0, x[ 2], y[22]); + word3_muladd(&w2, &w1, &w0, x[ 3], y[21]); + word3_muladd(&w2, &w1, &w0, x[ 4], y[20]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[19]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[18]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[17]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[16]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[15]); + word3_muladd(&w2, &w1, &w0, x[10], y[14]); + word3_muladd(&w2, &w1, &w0, x[11], y[13]); + word3_muladd(&w2, &w1, &w0, x[12], y[12]); + word3_muladd(&w2, &w1, &w0, x[13], y[11]); + word3_muladd(&w2, &w1, &w0, x[14], y[10]); + word3_muladd(&w2, &w1, &w0, x[15], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[16], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[17], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[18], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[19], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[20], y[ 4]); + word3_muladd(&w2, &w1, &w0, x[21], y[ 3]); + word3_muladd(&w2, &w1, &w0, x[22], y[ 2]); + word3_muladd(&w2, &w1, &w0, x[23], y[ 1]); + z[24] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 2], y[23]); + word3_muladd(&w0, &w2, &w1, x[ 3], y[22]); + word3_muladd(&w0, &w2, &w1, x[ 4], y[21]); + word3_muladd(&w0, &w2, &w1, x[ 5], y[20]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[19]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[18]); + word3_muladd(&w0, &w2, &w1, x[ 8], y[17]); + word3_muladd(&w0, &w2, &w1, x[ 9], y[16]); + word3_muladd(&w0, &w2, &w1, x[10], y[15]); + word3_muladd(&w0, &w2, &w1, x[11], y[14]); + word3_muladd(&w0, &w2, &w1, x[12], y[13]); + word3_muladd(&w0, &w2, &w1, x[13], y[12]); + word3_muladd(&w0, &w2, &w1, x[14], y[11]); + word3_muladd(&w0, &w2, &w1, x[15], y[10]); + word3_muladd(&w0, &w2, &w1, x[16], y[ 9]); + word3_muladd(&w0, &w2, &w1, x[17], y[ 8]); + word3_muladd(&w0, &w2, &w1, x[18], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[19], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[20], y[ 5]); + word3_muladd(&w0, &w2, &w1, x[21], y[ 4]); + word3_muladd(&w0, &w2, &w1, x[22], y[ 3]); + word3_muladd(&w0, &w2, &w1, x[23], y[ 2]); + z[25] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 3], y[23]); + word3_muladd(&w1, &w0, &w2, x[ 4], y[22]); + word3_muladd(&w1, &w0, &w2, x[ 5], y[21]); + word3_muladd(&w1, &w0, &w2, x[ 6], y[20]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[19]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[18]); + word3_muladd(&w1, &w0, &w2, x[ 9], y[17]); + word3_muladd(&w1, &w0, &w2, x[10], y[16]); + word3_muladd(&w1, &w0, &w2, x[11], y[15]); + word3_muladd(&w1, &w0, &w2, x[12], y[14]); + word3_muladd(&w1, &w0, &w2, x[13], y[13]); + word3_muladd(&w1, &w0, &w2, x[14], y[12]); + word3_muladd(&w1, &w0, &w2, x[15], y[11]); + word3_muladd(&w1, &w0, &w2, x[16], y[10]); + word3_muladd(&w1, &w0, &w2, x[17], y[ 9]); + word3_muladd(&w1, &w0, &w2, x[18], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[19], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[20], y[ 6]); + word3_muladd(&w1, &w0, &w2, x[21], y[ 5]); + word3_muladd(&w1, &w0, &w2, x[22], y[ 4]); + word3_muladd(&w1, &w0, &w2, x[23], y[ 3]); + z[26] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 4], y[23]); + word3_muladd(&w2, &w1, &w0, x[ 5], y[22]); + word3_muladd(&w2, &w1, &w0, x[ 6], y[21]); + word3_muladd(&w2, &w1, &w0, x[ 7], y[20]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[19]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[18]); + word3_muladd(&w2, &w1, &w0, x[10], y[17]); + word3_muladd(&w2, &w1, &w0, x[11], y[16]); + word3_muladd(&w2, &w1, &w0, x[12], y[15]); + word3_muladd(&w2, &w1, &w0, x[13], y[14]); + word3_muladd(&w2, &w1, &w0, x[14], y[13]); + word3_muladd(&w2, &w1, &w0, x[15], y[12]); + word3_muladd(&w2, &w1, &w0, x[16], y[11]); + word3_muladd(&w2, &w1, &w0, x[17], y[10]); + word3_muladd(&w2, &w1, &w0, x[18], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[19], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[20], y[ 7]); + word3_muladd(&w2, &w1, &w0, x[21], y[ 6]); + word3_muladd(&w2, &w1, &w0, x[22], y[ 5]); + word3_muladd(&w2, &w1, &w0, x[23], y[ 4]); + z[27] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 5], y[23]); + word3_muladd(&w0, &w2, &w1, x[ 6], y[22]); + word3_muladd(&w0, &w2, &w1, x[ 7], y[21]); + word3_muladd(&w0, &w2, &w1, x[ 8], y[20]); + word3_muladd(&w0, &w2, &w1, x[ 9], y[19]); + word3_muladd(&w0, &w2, &w1, x[10], y[18]); + word3_muladd(&w0, &w2, &w1, x[11], y[17]); + word3_muladd(&w0, &w2, &w1, x[12], y[16]); + word3_muladd(&w0, &w2, &w1, x[13], y[15]); + word3_muladd(&w0, &w2, &w1, x[14], y[14]); + word3_muladd(&w0, &w2, &w1, x[15], y[13]); + word3_muladd(&w0, &w2, &w1, x[16], y[12]); + word3_muladd(&w0, &w2, &w1, x[17], y[11]); + word3_muladd(&w0, &w2, &w1, x[18], y[10]); + word3_muladd(&w0, &w2, &w1, x[19], y[ 9]); + word3_muladd(&w0, &w2, &w1, x[20], y[ 8]); + word3_muladd(&w0, &w2, &w1, x[21], y[ 7]); + word3_muladd(&w0, &w2, &w1, x[22], y[ 6]); + word3_muladd(&w0, &w2, &w1, x[23], y[ 5]); + z[28] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 6], y[23]); + word3_muladd(&w1, &w0, &w2, x[ 7], y[22]); + word3_muladd(&w1, &w0, &w2, x[ 8], y[21]); + word3_muladd(&w1, &w0, &w2, x[ 9], y[20]); + word3_muladd(&w1, &w0, &w2, x[10], y[19]); + word3_muladd(&w1, &w0, &w2, x[11], y[18]); + word3_muladd(&w1, &w0, &w2, x[12], y[17]); + word3_muladd(&w1, &w0, &w2, x[13], y[16]); + word3_muladd(&w1, &w0, &w2, x[14], y[15]); + word3_muladd(&w1, &w0, &w2, x[15], y[14]); + word3_muladd(&w1, &w0, &w2, x[16], y[13]); + word3_muladd(&w1, &w0, &w2, x[17], y[12]); + word3_muladd(&w1, &w0, &w2, x[18], y[11]); + word3_muladd(&w1, &w0, &w2, x[19], y[10]); + word3_muladd(&w1, &w0, &w2, x[20], y[ 9]); + word3_muladd(&w1, &w0, &w2, x[21], y[ 8]); + word3_muladd(&w1, &w0, &w2, x[22], y[ 7]); + word3_muladd(&w1, &w0, &w2, x[23], y[ 6]); + z[29] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[ 7], y[23]); + word3_muladd(&w2, &w1, &w0, x[ 8], y[22]); + word3_muladd(&w2, &w1, &w0, x[ 9], y[21]); + word3_muladd(&w2, &w1, &w0, x[10], y[20]); + word3_muladd(&w2, &w1, &w0, x[11], y[19]); + word3_muladd(&w2, &w1, &w0, x[12], y[18]); + word3_muladd(&w2, &w1, &w0, x[13], y[17]); + word3_muladd(&w2, &w1, &w0, x[14], y[16]); + word3_muladd(&w2, &w1, &w0, x[15], y[15]); + word3_muladd(&w2, &w1, &w0, x[16], y[14]); + word3_muladd(&w2, &w1, &w0, x[17], y[13]); + word3_muladd(&w2, &w1, &w0, x[18], y[12]); + word3_muladd(&w2, &w1, &w0, x[19], y[11]); + word3_muladd(&w2, &w1, &w0, x[20], y[10]); + word3_muladd(&w2, &w1, &w0, x[21], y[ 9]); + word3_muladd(&w2, &w1, &w0, x[22], y[ 8]); + word3_muladd(&w2, &w1, &w0, x[23], y[ 7]); + z[30] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[ 8], y[23]); + word3_muladd(&w0, &w2, &w1, x[ 9], y[22]); + word3_muladd(&w0, &w2, &w1, x[10], y[21]); + word3_muladd(&w0, &w2, &w1, x[11], y[20]); + word3_muladd(&w0, &w2, &w1, x[12], y[19]); + word3_muladd(&w0, &w2, &w1, x[13], y[18]); + word3_muladd(&w0, &w2, &w1, x[14], y[17]); + word3_muladd(&w0, &w2, &w1, x[15], y[16]); + word3_muladd(&w0, &w2, &w1, x[16], y[15]); + word3_muladd(&w0, &w2, &w1, x[17], y[14]); + word3_muladd(&w0, &w2, &w1, x[18], y[13]); + word3_muladd(&w0, &w2, &w1, x[19], y[12]); + word3_muladd(&w0, &w2, &w1, x[20], y[11]); + word3_muladd(&w0, &w2, &w1, x[21], y[10]); + word3_muladd(&w0, &w2, &w1, x[22], y[ 9]); + word3_muladd(&w0, &w2, &w1, x[23], y[ 8]); + z[31] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[ 9], y[23]); + word3_muladd(&w1, &w0, &w2, x[10], y[22]); + word3_muladd(&w1, &w0, &w2, x[11], y[21]); + word3_muladd(&w1, &w0, &w2, x[12], y[20]); + word3_muladd(&w1, &w0, &w2, x[13], y[19]); + word3_muladd(&w1, &w0, &w2, x[14], y[18]); + word3_muladd(&w1, &w0, &w2, x[15], y[17]); + word3_muladd(&w1, &w0, &w2, x[16], y[16]); + word3_muladd(&w1, &w0, &w2, x[17], y[15]); + word3_muladd(&w1, &w0, &w2, x[18], y[14]); + word3_muladd(&w1, &w0, &w2, x[19], y[13]); + word3_muladd(&w1, &w0, &w2, x[20], y[12]); + word3_muladd(&w1, &w0, &w2, x[21], y[11]); + word3_muladd(&w1, &w0, &w2, x[22], y[10]); + word3_muladd(&w1, &w0, &w2, x[23], y[ 9]); + z[32] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[10], y[23]); + word3_muladd(&w2, &w1, &w0, x[11], y[22]); + word3_muladd(&w2, &w1, &w0, x[12], y[21]); + word3_muladd(&w2, &w1, &w0, x[13], y[20]); + word3_muladd(&w2, &w1, &w0, x[14], y[19]); + word3_muladd(&w2, &w1, &w0, x[15], y[18]); + word3_muladd(&w2, &w1, &w0, x[16], y[17]); + word3_muladd(&w2, &w1, &w0, x[17], y[16]); + word3_muladd(&w2, &w1, &w0, x[18], y[15]); + word3_muladd(&w2, &w1, &w0, x[19], y[14]); + word3_muladd(&w2, &w1, &w0, x[20], y[13]); + word3_muladd(&w2, &w1, &w0, x[21], y[12]); + word3_muladd(&w2, &w1, &w0, x[22], y[11]); + word3_muladd(&w2, &w1, &w0, x[23], y[10]); + z[33] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[11], y[23]); + word3_muladd(&w0, &w2, &w1, x[12], y[22]); + word3_muladd(&w0, &w2, &w1, x[13], y[21]); + word3_muladd(&w0, &w2, &w1, x[14], y[20]); + word3_muladd(&w0, &w2, &w1, x[15], y[19]); + word3_muladd(&w0, &w2, &w1, x[16], y[18]); + word3_muladd(&w0, &w2, &w1, x[17], y[17]); + word3_muladd(&w0, &w2, &w1, x[18], y[16]); + word3_muladd(&w0, &w2, &w1, x[19], y[15]); + word3_muladd(&w0, &w2, &w1, x[20], y[14]); + word3_muladd(&w0, &w2, &w1, x[21], y[13]); + word3_muladd(&w0, &w2, &w1, x[22], y[12]); + word3_muladd(&w0, &w2, &w1, x[23], y[11]); + z[34] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[12], y[23]); + word3_muladd(&w1, &w0, &w2, x[13], y[22]); + word3_muladd(&w1, &w0, &w2, x[14], y[21]); + word3_muladd(&w1, &w0, &w2, x[15], y[20]); + word3_muladd(&w1, &w0, &w2, x[16], y[19]); + word3_muladd(&w1, &w0, &w2, x[17], y[18]); + word3_muladd(&w1, &w0, &w2, x[18], y[17]); + word3_muladd(&w1, &w0, &w2, x[19], y[16]); + word3_muladd(&w1, &w0, &w2, x[20], y[15]); + word3_muladd(&w1, &w0, &w2, x[21], y[14]); + word3_muladd(&w1, &w0, &w2, x[22], y[13]); + word3_muladd(&w1, &w0, &w2, x[23], y[12]); + z[35] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[13], y[23]); + word3_muladd(&w2, &w1, &w0, x[14], y[22]); + word3_muladd(&w2, &w1, &w0, x[15], y[21]); + word3_muladd(&w2, &w1, &w0, x[16], y[20]); + word3_muladd(&w2, &w1, &w0, x[17], y[19]); + word3_muladd(&w2, &w1, &w0, x[18], y[18]); + word3_muladd(&w2, &w1, &w0, x[19], y[17]); + word3_muladd(&w2, &w1, &w0, x[20], y[16]); + word3_muladd(&w2, &w1, &w0, x[21], y[15]); + word3_muladd(&w2, &w1, &w0, x[22], y[14]); + word3_muladd(&w2, &w1, &w0, x[23], y[13]); + z[36] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[14], y[23]); + word3_muladd(&w0, &w2, &w1, x[15], y[22]); + word3_muladd(&w0, &w2, &w1, x[16], y[21]); + word3_muladd(&w0, &w2, &w1, x[17], y[20]); + word3_muladd(&w0, &w2, &w1, x[18], y[19]); + word3_muladd(&w0, &w2, &w1, x[19], y[18]); + word3_muladd(&w0, &w2, &w1, x[20], y[17]); + word3_muladd(&w0, &w2, &w1, x[21], y[16]); + word3_muladd(&w0, &w2, &w1, x[22], y[15]); + word3_muladd(&w0, &w2, &w1, x[23], y[14]); + z[37] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[15], y[23]); + word3_muladd(&w1, &w0, &w2, x[16], y[22]); + word3_muladd(&w1, &w0, &w2, x[17], y[21]); + word3_muladd(&w1, &w0, &w2, x[18], y[20]); + word3_muladd(&w1, &w0, &w2, x[19], y[19]); + word3_muladd(&w1, &w0, &w2, x[20], y[18]); + word3_muladd(&w1, &w0, &w2, x[21], y[17]); + word3_muladd(&w1, &w0, &w2, x[22], y[16]); + word3_muladd(&w1, &w0, &w2, x[23], y[15]); + z[38] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[16], y[23]); + word3_muladd(&w2, &w1, &w0, x[17], y[22]); + word3_muladd(&w2, &w1, &w0, x[18], y[21]); + word3_muladd(&w2, &w1, &w0, x[19], y[20]); + word3_muladd(&w2, &w1, &w0, x[20], y[19]); + word3_muladd(&w2, &w1, &w0, x[21], y[18]); + word3_muladd(&w2, &w1, &w0, x[22], y[17]); + word3_muladd(&w2, &w1, &w0, x[23], y[16]); + z[39] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[17], y[23]); + word3_muladd(&w0, &w2, &w1, x[18], y[22]); + word3_muladd(&w0, &w2, &w1, x[19], y[21]); + word3_muladd(&w0, &w2, &w1, x[20], y[20]); + word3_muladd(&w0, &w2, &w1, x[21], y[19]); + word3_muladd(&w0, &w2, &w1, x[22], y[18]); + word3_muladd(&w0, &w2, &w1, x[23], y[17]); + z[40] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[18], y[23]); + word3_muladd(&w1, &w0, &w2, x[19], y[22]); + word3_muladd(&w1, &w0, &w2, x[20], y[21]); + word3_muladd(&w1, &w0, &w2, x[21], y[20]); + word3_muladd(&w1, &w0, &w2, x[22], y[19]); + word3_muladd(&w1, &w0, &w2, x[23], y[18]); + z[41] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[19], y[23]); + word3_muladd(&w2, &w1, &w0, x[20], y[22]); + word3_muladd(&w2, &w1, &w0, x[21], y[21]); + word3_muladd(&w2, &w1, &w0, x[22], y[20]); + word3_muladd(&w2, &w1, &w0, x[23], y[19]); + z[42] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[20], y[23]); + word3_muladd(&w0, &w2, &w1, x[21], y[22]); + word3_muladd(&w0, &w2, &w1, x[22], y[21]); + word3_muladd(&w0, &w2, &w1, x[23], y[20]); + z[43] = w1; w1 = 0; + + word3_muladd(&w1, &w0, &w2, x[21], y[23]); + word3_muladd(&w1, &w0, &w2, x[22], y[22]); + word3_muladd(&w1, &w0, &w2, x[23], y[21]); + z[44] = w2; w2 = 0; + + word3_muladd(&w2, &w1, &w0, x[22], y[23]); + word3_muladd(&w2, &w1, &w0, x[23], y[22]); + z[45] = w0; w0 = 0; + + word3_muladd(&w0, &w2, &w1, x[23], y[23]); + z[46] = w1; + z[47] = w2; + } + +} diff --git a/comm/third_party/botan/src/lib/math/mp/mp_core.h b/comm/third_party/botan/src/lib/math/mp/mp_core.h new file mode 100644 index 0000000000..c4bf8e8815 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/mp/mp_core.h @@ -0,0 +1,819 @@ +/* +* MPI Algorithms +* (C) 1999-2010,2018 Jack Lloyd +* 2006 Luca Piccarreta +* 2016 Matthias Gierlings +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_MP_CORE_OPS_H_ +#define BOTAN_MP_CORE_OPS_H_ + +#include <botan/types.h> +#include <botan/exceptn.h> +#include <botan/mem_ops.h> +#include <botan/internal/mp_asmi.h> +#include <botan/internal/ct_utils.h> +#include <algorithm> + +namespace Botan { + +const word MP_WORD_MAX = ~static_cast<word>(0); + +/* +* If cond == 0, does nothing. +* If cond > 0, swaps x[0:size] with y[0:size] +* Runs in constant time +*/ +inline void bigint_cnd_swap(word cnd, word x[], word y[], size_t size) + { + const auto mask = CT::Mask<word>::expand(cnd); + + for(size_t i = 0; i != size; ++i) + { + const word a = x[i]; + const word b = y[i]; + x[i] = mask.select(b, a); + y[i] = mask.select(a, b); + } + } + +inline word bigint_cnd_add(word cnd, word x[], word x_size, + const word y[], size_t y_size) + { + BOTAN_ASSERT(x_size >= y_size, "Expected sizes"); + + const auto mask = CT::Mask<word>::expand(cnd); + + word carry = 0; + + const size_t blocks = y_size - (y_size % 8); + word z[8] = { 0 }; + + for(size_t i = 0; i != blocks; i += 8) + { + carry = word8_add3(z, x + i, y + i, carry); + mask.select_n(x + i, z, x + i, 8); + } + + for(size_t i = blocks; i != y_size; ++i) + { + z[0] = word_add(x[i], y[i], &carry); + x[i] = mask.select(z[0], x[i]); + } + + for(size_t i = y_size; i != x_size; ++i) + { + z[0] = word_add(x[i], 0, &carry); + x[i] = mask.select(z[0], x[i]); + } + + return mask.if_set_return(carry); + } + +/* +* If cond > 0 adds x[0:size] and y[0:size] and returns carry +* Runs in constant time +*/ +inline word bigint_cnd_add(word cnd, word x[], const word y[], size_t size) + { + return bigint_cnd_add(cnd, x, size, y, size); + } + +/* +* If cond > 0 subtracts x[0:size] and y[0:size] and returns borrow +* Runs in constant time +*/ +inline word bigint_cnd_sub(word cnd, + word x[], size_t x_size, + const word y[], size_t y_size) + { + BOTAN_ASSERT(x_size >= y_size, "Expected sizes"); + + const auto mask = CT::Mask<word>::expand(cnd); + + word carry = 0; + + const size_t blocks = y_size - (y_size % 8); + word z[8] = { 0 }; + + for(size_t i = 0; i != blocks; i += 8) + { + carry = word8_sub3(z, x + i, y + i, carry); + mask.select_n(x + i, z, x + i, 8); + } + + for(size_t i = blocks; i != y_size; ++i) + { + z[0] = word_sub(x[i], y[i], &carry); + x[i] = mask.select(z[0], x[i]); + } + + for(size_t i = y_size; i != x_size; ++i) + { + z[0] = word_sub(x[i], 0, &carry); + x[i] = mask.select(z[0], x[i]); + } + + return mask.if_set_return(carry); + } + +/* +* If cond > 0 adds x[0:size] and y[0:size] and returns carry +* Runs in constant time +*/ +inline word bigint_cnd_sub(word cnd, word x[], const word y[], size_t size) + { + return bigint_cnd_sub(cnd, x, size, y, size); + } + + +/* +* Equivalent to +* bigint_cnd_add( mask, x, y, size); +* bigint_cnd_sub(~mask, x, y, size); +* +* Mask must be either 0 or all 1 bits +*/ +inline void bigint_cnd_add_or_sub(CT::Mask<word> mask, word x[], const word y[], size_t size) + { + const size_t blocks = size - (size % 8); + + word carry = 0; + word borrow = 0; + + word t0[8] = { 0 }; + word t1[8] = { 0 }; + + for(size_t i = 0; i != blocks; i += 8) + { + carry = word8_add3(t0, x + i, y + i, carry); + borrow = word8_sub3(t1, x + i, y + i, borrow); + + for(size_t j = 0; j != 8; ++j) + x[i+j] = mask.select(t0[j], t1[j]); + } + + for(size_t i = blocks; i != size; ++i) + { + const word a = word_add(x[i], y[i], &carry); + const word s = word_sub(x[i], y[i], &borrow); + + x[i] = mask.select(a, s); + } + } + +/* +* Equivalent to +* bigint_cnd_add( mask, x, size, y, size); +* bigint_cnd_sub(~mask, x, size, z, size); +* +* Mask must be either 0 or all 1 bits +* +* Returns the carry or borrow resp +*/ +inline word bigint_cnd_addsub(CT::Mask<word> mask, word x[], + const word y[], const word z[], + size_t size) + { + const size_t blocks = size - (size % 8); + + word carry = 0; + word borrow = 0; + + word t0[8] = { 0 }; + word t1[8] = { 0 }; + + for(size_t i = 0; i != blocks; i += 8) + { + carry = word8_add3(t0, x + i, y + i, carry); + borrow = word8_sub3(t1, x + i, z + i, borrow); + + for(size_t j = 0; j != 8; ++j) + x[i+j] = mask.select(t0[j], t1[j]); + } + + for(size_t i = blocks; i != size; ++i) + { + t0[0] = word_add(x[i], y[i], &carry); + t1[0] = word_sub(x[i], z[i], &borrow); + x[i] = mask.select(t0[0], t1[0]); + } + + return mask.select(carry, borrow); + } + +/* +* 2s complement absolute value +* If cond > 0 sets x to ~x + 1 +* Runs in constant time +*/ +inline void bigint_cnd_abs(word cnd, word x[], size_t size) + { + const auto mask = CT::Mask<word>::expand(cnd); + + word carry = mask.if_set_return(1); + for(size_t i = 0; i != size; ++i) + { + const word z = word_add(~x[i], 0, &carry); + x[i] = mask.select(z, x[i]); + } + } + +/** +* Two operand addition with carry out +*/ +inline word bigint_add2_nc(word x[], size_t x_size, const word y[], size_t y_size) + { + word carry = 0; + + BOTAN_ASSERT(x_size >= y_size, "Expected sizes"); + + const size_t blocks = y_size - (y_size % 8); + + for(size_t i = 0; i != blocks; i += 8) + carry = word8_add2(x + i, y + i, carry); + + for(size_t i = blocks; i != y_size; ++i) + x[i] = word_add(x[i], y[i], &carry); + + for(size_t i = y_size; i != x_size; ++i) + x[i] = word_add(x[i], 0, &carry); + + return carry; + } + +/** +* Three operand addition with carry out +*/ +inline word bigint_add3_nc(word z[], + const word x[], size_t x_size, + const word y[], size_t y_size) + { + if(x_size < y_size) + { return bigint_add3_nc(z, y, y_size, x, x_size); } + + word carry = 0; + + const size_t blocks = y_size - (y_size % 8); + + for(size_t i = 0; i != blocks; i += 8) + carry = word8_add3(z + i, x + i, y + i, carry); + + for(size_t i = blocks; i != y_size; ++i) + z[i] = word_add(x[i], y[i], &carry); + + for(size_t i = y_size; i != x_size; ++i) + z[i] = word_add(x[i], 0, &carry); + + return carry; + } + +/** +* Two operand addition +* @param x the first operand (and output) +* @param x_size size of x +* @param y the second operand +* @param y_size size of y (must be >= x_size) +*/ +inline void bigint_add2(word x[], size_t x_size, + const word y[], size_t y_size) + { + x[x_size] += bigint_add2_nc(x, x_size, y, y_size); + } + +/** +* Three operand addition +*/ +inline void bigint_add3(word z[], + const word x[], size_t x_size, + const word y[], size_t y_size) + { + z[x_size > y_size ? x_size : y_size] += + bigint_add3_nc(z, x, x_size, y, y_size); + } + +/** +* Two operand subtraction +*/ +inline word bigint_sub2(word x[], size_t x_size, + const word y[], size_t y_size) + { + word borrow = 0; + + BOTAN_ASSERT(x_size >= y_size, "Expected sizes"); + + const size_t blocks = y_size - (y_size % 8); + + for(size_t i = 0; i != blocks; i += 8) + borrow = word8_sub2(x + i, y + i, borrow); + + for(size_t i = blocks; i != y_size; ++i) + x[i] = word_sub(x[i], y[i], &borrow); + + for(size_t i = y_size; i != x_size; ++i) + x[i] = word_sub(x[i], 0, &borrow); + + return borrow; + } + +/** +* Two operand subtraction, x = y - x; assumes y >= x +*/ +inline void bigint_sub2_rev(word x[], const word y[], size_t y_size) + { + word borrow = 0; + + const size_t blocks = y_size - (y_size % 8); + + for(size_t i = 0; i != blocks; i += 8) + borrow = word8_sub2_rev(x + i, y + i, borrow); + + for(size_t i = blocks; i != y_size; ++i) + x[i] = word_sub(y[i], x[i], &borrow); + + BOTAN_ASSERT(borrow == 0, "y must be greater than x"); + } + +/** +* Three operand subtraction +*/ +inline word bigint_sub3(word z[], + const word x[], size_t x_size, + const word y[], size_t y_size) + { + word borrow = 0; + + BOTAN_ASSERT(x_size >= y_size, "Expected sizes"); + + const size_t blocks = y_size - (y_size % 8); + + for(size_t i = 0; i != blocks; i += 8) + borrow = word8_sub3(z + i, x + i, y + i, borrow); + + for(size_t i = blocks; i != y_size; ++i) + z[i] = word_sub(x[i], y[i], &borrow); + + for(size_t i = y_size; i != x_size; ++i) + z[i] = word_sub(x[i], 0, &borrow); + + return borrow; + } + +/** +* Return abs(x-y), ie if x >= y, then compute z = x - y +* Otherwise compute z = y - x +* No borrow is possible since the result is always >= 0 +* +* Returns ~0 if x >= y or 0 if x < y +* @param z output array of at least N words +* @param x input array of N words +* @param y input array of N words +* @param N length of x and y +* @param ws array of at least 2*N words +*/ +inline CT::Mask<word> +bigint_sub_abs(word z[], + const word x[], const word y[], size_t N, + word ws[]) + { + // Subtract in both direction then conditional copy out the result + + word* ws0 = ws; + word* ws1 = ws + N; + + word borrow0 = 0; + word borrow1 = 0; + + const size_t blocks = N - (N % 8); + + for(size_t i = 0; i != blocks; i += 8) + { + borrow0 = word8_sub3(ws0 + i, x + i, y + i, borrow0); + borrow1 = word8_sub3(ws1 + i, y + i, x + i, borrow1); + } + + for(size_t i = blocks; i != N; ++i) + { + ws0[i] = word_sub(x[i], y[i], &borrow0); + ws1[i] = word_sub(y[i], x[i], &borrow1); + } + + return CT::conditional_copy_mem(borrow0, z, ws1, ws0, N); + } + +/* +* Shift Operations +*/ +inline void bigint_shl1(word x[], size_t x_size, size_t x_words, + size_t word_shift, size_t bit_shift) + { + copy_mem(x + word_shift, x, x_words); + clear_mem(x, word_shift); + + const auto carry_mask = CT::Mask<word>::expand(bit_shift); + const size_t carry_shift = carry_mask.if_set_return(BOTAN_MP_WORD_BITS - bit_shift); + + word carry = 0; + for(size_t i = word_shift; i != x_size; ++i) + { + const word w = x[i]; + x[i] = (w << bit_shift) | carry; + carry = carry_mask.if_set_return(w >> carry_shift); + } + } + +inline void bigint_shr1(word x[], size_t x_size, + size_t word_shift, size_t bit_shift) + { + const size_t top = x_size >= word_shift ? (x_size - word_shift) : 0; + + if(top > 0) + copy_mem(x, x + word_shift, top); + clear_mem(x + top, std::min(word_shift, x_size)); + + const auto carry_mask = CT::Mask<word>::expand(bit_shift); + const size_t carry_shift = carry_mask.if_set_return(BOTAN_MP_WORD_BITS - bit_shift); + + word carry = 0; + + for(size_t i = 0; i != top; ++i) + { + const word w = x[top - i - 1]; + x[top-i-1] = (w >> bit_shift) | carry; + carry = carry_mask.if_set_return(w << carry_shift); + } + } + +inline void bigint_shl2(word y[], const word x[], size_t x_size, + size_t word_shift, size_t bit_shift) + { + copy_mem(y + word_shift, x, x_size); + + const auto carry_mask = CT::Mask<word>::expand(bit_shift); + const size_t carry_shift = carry_mask.if_set_return(BOTAN_MP_WORD_BITS - bit_shift); + + word carry = 0; + for(size_t i = word_shift; i != x_size + word_shift + 1; ++i) + { + const word w = y[i]; + y[i] = (w << bit_shift) | carry; + carry = carry_mask.if_set_return(w >> carry_shift); + } + } + +inline void bigint_shr2(word y[], const word x[], size_t x_size, + size_t word_shift, size_t bit_shift) + { + const size_t new_size = x_size < word_shift ? 0 : (x_size - word_shift); + + if(new_size > 0) + copy_mem(y, x + word_shift, new_size); + + const auto carry_mask = CT::Mask<word>::expand(bit_shift); + const size_t carry_shift = carry_mask.if_set_return(BOTAN_MP_WORD_BITS - bit_shift); + + word carry = 0; + for(size_t i = new_size; i > 0; --i) + { + word w = y[i-1]; + y[i-1] = (w >> bit_shift) | carry; + carry = carry_mask.if_set_return(w << carry_shift); + } + } + +/* +* Linear Multiply - returns the carry +*/ +inline word BOTAN_WARN_UNUSED_RESULT bigint_linmul2(word x[], size_t x_size, word y) + { + const size_t blocks = x_size - (x_size % 8); + + word carry = 0; + + for(size_t i = 0; i != blocks; i += 8) + carry = word8_linmul2(x + i, y, carry); + + for(size_t i = blocks; i != x_size; ++i) + x[i] = word_madd2(x[i], y, &carry); + + return carry; + } + +inline void bigint_linmul3(word z[], const word x[], size_t x_size, word y) + { + const size_t blocks = x_size - (x_size % 8); + + word carry = 0; + + for(size_t i = 0; i != blocks; i += 8) + carry = word8_linmul3(z + i, x + i, y, carry); + + for(size_t i = blocks; i != x_size; ++i) + z[i] = word_madd2(x[i], y, &carry); + + z[x_size] = carry; + } + +/** +* Compare x and y +* Return -1 if x < y +* Return 0 if x == y +* Return 1 if x > y +*/ +inline int32_t bigint_cmp(const word x[], size_t x_size, + const word y[], size_t y_size) + { + static_assert(sizeof(word) >= sizeof(uint32_t), "Size assumption"); + + const word LT = static_cast<word>(-1); + const word EQ = 0; + const word GT = 1; + + const size_t common_elems = std::min(x_size, y_size); + + word result = EQ; // until found otherwise + + for(size_t i = 0; i != common_elems; i++) + { + const auto is_eq = CT::Mask<word>::is_equal(x[i], y[i]); + const auto is_lt = CT::Mask<word>::is_lt(x[i], y[i]); + + result = is_eq.select(result, is_lt.select(LT, GT)); + } + + if(x_size < y_size) + { + word mask = 0; + for(size_t i = x_size; i != y_size; i++) + mask |= y[i]; + + // If any bits were set in high part of y, then x < y + result = CT::Mask<word>::is_zero(mask).select(result, LT); + } + else if(y_size < x_size) + { + word mask = 0; + for(size_t i = y_size; i != x_size; i++) + mask |= x[i]; + + // If any bits were set in high part of x, then x > y + result = CT::Mask<word>::is_zero(mask).select(result, GT); + } + + CT::unpoison(result); + BOTAN_DEBUG_ASSERT(result == LT || result == GT || result == EQ); + return static_cast<int32_t>(result); + } + +/** +* Compare x and y +* Return ~0 if x[0:x_size] < y[0:y_size] or 0 otherwise +* If lt_or_equal is true, returns ~0 also for x == y +*/ +inline CT::Mask<word> +bigint_ct_is_lt(const word x[], size_t x_size, + const word y[], size_t y_size, + bool lt_or_equal = false) + { + const size_t common_elems = std::min(x_size, y_size); + + auto is_lt = CT::Mask<word>::expand(lt_or_equal); + + for(size_t i = 0; i != common_elems; i++) + { + const auto eq = CT::Mask<word>::is_equal(x[i], y[i]); + const auto lt = CT::Mask<word>::is_lt(x[i], y[i]); + is_lt = eq.select_mask(is_lt, lt); + } + + if(x_size < y_size) + { + word mask = 0; + for(size_t i = x_size; i != y_size; i++) + mask |= y[i]; + // If any bits were set in high part of y, then is_lt should be forced true + is_lt |= CT::Mask<word>::expand(mask); + } + else if(y_size < x_size) + { + word mask = 0; + for(size_t i = y_size; i != x_size; i++) + mask |= x[i]; + + // If any bits were set in high part of x, then is_lt should be false + is_lt &= CT::Mask<word>::is_zero(mask); + } + + return is_lt; + } + +inline CT::Mask<word> +bigint_ct_is_eq(const word x[], size_t x_size, + const word y[], size_t y_size) + { + const size_t common_elems = std::min(x_size, y_size); + + word diff = 0; + + for(size_t i = 0; i != common_elems; i++) + { + diff |= (x[i] ^ y[i]); + } + + // If any bits were set in high part of x/y, then they are not equal + if(x_size < y_size) + { + for(size_t i = x_size; i != y_size; i++) + diff |= y[i]; + } + else if(y_size < x_size) + { + for(size_t i = y_size; i != x_size; i++) + diff |= x[i]; + } + + return CT::Mask<word>::is_zero(diff); + } + +/** +* Set z to abs(x-y), ie if x >= y, then compute z = x - y +* Otherwise compute z = y - x +* No borrow is possible since the result is always >= 0 +* +* Return the relative size of x vs y (-1, 0, 1) +* +* @param z output array of max(x_size,y_size) words +* @param x input param +* @param x_size length of x +* @param y input param +* @param y_size length of y +*/ +inline int32_t +bigint_sub_abs(word z[], + const word x[], size_t x_size, + const word y[], size_t y_size) + { + const int32_t relative_size = bigint_cmp(x, x_size, y, y_size); + + // Swap if relative_size == -1 + const bool need_swap = relative_size < 0; + CT::conditional_swap_ptr(need_swap, x, y); + CT::conditional_swap(need_swap, x_size, y_size); + + /* + * We know at this point that x >= y so if y_size is larger than + * x_size, we are guaranteed they are just leading zeros which can + * be ignored + */ + y_size = std::min(x_size, y_size); + + bigint_sub3(z, x, x_size, y, y_size); + + return relative_size; + } + +/** +* Set t to t-s modulo mod +* +* @param t first integer +* @param s second integer +* @param mod the modulus +* @param mod_sw size of t, s, and mod +* @param ws workspace of size mod_sw +*/ +inline void +bigint_mod_sub(word t[], const word s[], const word mod[], size_t mod_sw, word ws[]) + { + // is t < s or not? + const auto is_lt = bigint_ct_is_lt(t, mod_sw, s, mod_sw); + + // ws = p - s + const word borrow = bigint_sub3(ws, mod, mod_sw, s, mod_sw); + + // Compute either (t - s) or (t + (p - s)) depending on mask + const word carry = bigint_cnd_addsub(is_lt, t, ws, s, mod_sw); + + BOTAN_DEBUG_ASSERT(borrow == 0 && carry == 0); + BOTAN_UNUSED(carry, borrow); + } + +template<size_t N> +inline void bigint_mod_sub_n(word t[], const word s[], const word mod[], word ws[]) + { + // is t < s or not? + const auto is_lt = bigint_ct_is_lt(t, N, s, N); + + // ws = p - s + const word borrow = bigint_sub3(ws, mod, N, s, N); + + // Compute either (t - s) or (t + (p - s)) depending on mask + const word carry = bigint_cnd_addsub(is_lt, t, ws, s, N); + + BOTAN_DEBUG_ASSERT(borrow == 0 && carry == 0); + BOTAN_UNUSED(carry, borrow); + } + +/** +* Compute ((n1<<bits) + n0) / d +*/ +inline word bigint_divop(word n1, word n0, word d) + { + if(d == 0) + throw Invalid_Argument("bigint_divop divide by zero"); + +#if defined(BOTAN_HAS_MP_DWORD) + return ((static_cast<dword>(n1) << BOTAN_MP_WORD_BITS) | n0) / d; +#else + + word high = n1 % d; + word quotient = 0; + + for(size_t i = 0; i != BOTAN_MP_WORD_BITS; ++i) + { + const word high_top_bit = high >> (BOTAN_MP_WORD_BITS-1); + + high <<= 1; + high |= (n0 >> (BOTAN_MP_WORD_BITS-1-i)) & 1; + quotient <<= 1; + + if(high_top_bit || high >= d) + { + high -= d; + quotient |= 1; + } + } + + return quotient; +#endif + } + +/** +* Compute ((n1<<bits) + n0) % d +*/ +inline word bigint_modop(word n1, word n0, word d) + { + if(d == 0) + throw Invalid_Argument("bigint_modop divide by zero"); + +#if defined(BOTAN_HAS_MP_DWORD) + return ((static_cast<dword>(n1) << BOTAN_MP_WORD_BITS) | n0) % d; +#else + word z = bigint_divop(n1, n0, d); + word dummy = 0; + z = word_madd2(z, d, &dummy); + return (n0-z); +#endif + } + +/* +* Comba Multiplication / Squaring +*/ +void bigint_comba_mul4(word z[8], const word x[4], const word y[4]); +void bigint_comba_mul6(word z[12], const word x[6], const word y[6]); +void bigint_comba_mul8(word z[16], const word x[8], const word y[8]); +void bigint_comba_mul9(word z[18], const word x[9], const word y[9]); +void bigint_comba_mul16(word z[32], const word x[16], const word y[16]); +void bigint_comba_mul24(word z[48], const word x[24], const word y[24]); + +void bigint_comba_sqr4(word out[8], const word in[4]); +void bigint_comba_sqr6(word out[12], const word in[6]); +void bigint_comba_sqr8(word out[16], const word in[8]); +void bigint_comba_sqr9(word out[18], const word in[9]); +void bigint_comba_sqr16(word out[32], const word in[16]); +void bigint_comba_sqr24(word out[48], const word in[24]); + +/** +* Montgomery Reduction +* @param z integer to reduce, of size exactly 2*(p_size+1). + Output is in the first p_size+1 words, higher + words are set to zero. +* @param p modulus +* @param p_size size of p +* @param p_dash Montgomery value +* @param workspace array of at least 2*(p_size+1) words +* @param ws_size size of workspace in words +*/ +void bigint_monty_redc(word z[], + const word p[], size_t p_size, + word p_dash, + word workspace[], + size_t ws_size); + +/* +* High Level Multiplication/Squaring Interfaces +*/ + +void bigint_mul(word z[], size_t z_size, + const word x[], size_t x_size, size_t x_sw, + const word y[], size_t y_size, size_t y_sw, + word workspace[], size_t ws_size); + +void bigint_sqr(word z[], size_t z_size, + const word x[], size_t x_size, size_t x_sw, + word workspace[], size_t ws_size); + +} + +#endif diff --git a/comm/third_party/botan/src/lib/math/mp/mp_karat.cpp b/comm/third_party/botan/src/lib/math/mp/mp_karat.cpp new file mode 100644 index 0000000000..15fcafa5be --- /dev/null +++ b/comm/third_party/botan/src/lib/math/mp/mp_karat.cpp @@ -0,0 +1,408 @@ +/* +* Multiplication and Squaring +* (C) 1999-2010,2018 Jack Lloyd +* 2016 Matthias Gierlings +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/internal/mp_core.h> +#include <botan/internal/mp_asmi.h> +#include <botan/internal/ct_utils.h> +#include <botan/mem_ops.h> +#include <botan/exceptn.h> + +namespace Botan { + +namespace { + +const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32; +const size_t KARATSUBA_SQUARE_THRESHOLD = 32; + +/* +* Simple O(N^2) Multiplication +*/ +void basecase_mul(word z[], size_t z_size, + const word x[], size_t x_size, + const word y[], size_t y_size) + { + if(z_size < x_size + y_size) + throw Invalid_Argument("basecase_mul z_size too small"); + + const size_t x_size_8 = x_size - (x_size % 8); + + clear_mem(z, z_size); + + for(size_t i = 0; i != y_size; ++i) + { + const word y_i = y[i]; + + word carry = 0; + + for(size_t j = 0; j != x_size_8; j += 8) + carry = word8_madd3(z + i + j, x + j, y_i, carry); + + for(size_t j = x_size_8; j != x_size; ++j) + z[i+j] = word_madd3(x[j], y_i, z[i+j], &carry); + + z[x_size+i] = carry; + } + } + +void basecase_sqr(word z[], size_t z_size, + const word x[], size_t x_size) + { + if(z_size < 2*x_size) + throw Invalid_Argument("basecase_sqr z_size too small"); + + const size_t x_size_8 = x_size - (x_size % 8); + + clear_mem(z, z_size); + + for(size_t i = 0; i != x_size; ++i) + { + const word x_i = x[i]; + + word carry = 0; + + for(size_t j = 0; j != x_size_8; j += 8) + carry = word8_madd3(z + i + j, x + j, x_i, carry); + + for(size_t j = x_size_8; j != x_size; ++j) + z[i+j] = word_madd3(x[j], x_i, z[i+j], &carry); + + z[x_size+i] = carry; + } + } + +/* +* Karatsuba Multiplication Operation +*/ +void karatsuba_mul(word z[], const word x[], const word y[], size_t N, + word workspace[]) + { + if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2) + { + switch(N) + { + case 6: + return bigint_comba_mul6(z, x, y); + case 8: + return bigint_comba_mul8(z, x, y); + case 9: + return bigint_comba_mul9(z, x, y); + case 16: + return bigint_comba_mul16(z, x, y); + case 24: + return bigint_comba_mul24(z, x, y); + default: + return basecase_mul(z, 2*N, x, N, y, N); + } + } + + const size_t N2 = N / 2; + + const word* x0 = x; + const word* x1 = x + N2; + const word* y0 = y; + const word* y1 = y + N2; + word* z0 = z; + word* z1 = z + N; + + word* ws0 = workspace; + word* ws1 = workspace + N; + + clear_mem(workspace, 2*N); + + /* + * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here, + * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do + * anything, clear_mem above already set the correct result. + * + * However we ignore the result of the comparisons and always perform the + * subtractions and recursively multiply to avoid the timing channel. + */ + + // First compute (X_lo - X_hi)*(Y_hi - Y_lo) + const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace); + const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace); + const auto neg_mask = ~(cmp0 ^ cmp1); + + karatsuba_mul(ws0, z0, z1, N2, ws1); + + // Compute X_lo * Y_lo + karatsuba_mul(z0, x0, y0, N2, ws1); + + // Compute X_hi * Y_hi + karatsuba_mul(z1, x1, y1, N2, ws1); + + const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N); + word z_carry = bigint_add2_nc(z + N2, N, ws1, N); + + z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1); + bigint_add2_nc(z + N + N2, N2, &z_carry, 1); + + clear_mem(workspace + N, N2); + + bigint_cnd_add_or_sub(neg_mask, z + N2, workspace, 2*N-N2); + } + +/* +* Karatsuba Squaring Operation +*/ +void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[]) + { + if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2) + { + switch(N) + { + case 6: + return bigint_comba_sqr6(z, x); + case 8: + return bigint_comba_sqr8(z, x); + case 9: + return bigint_comba_sqr9(z, x); + case 16: + return bigint_comba_sqr16(z, x); + case 24: + return bigint_comba_sqr24(z, x); + default: + return basecase_sqr(z, 2*N, x, N); + } + } + + const size_t N2 = N / 2; + + const word* x0 = x; + const word* x1 = x + N2; + word* z0 = z; + word* z1 = z + N; + + word* ws0 = workspace; + word* ws1 = workspace + N; + + clear_mem(workspace, 2*N); + + // See comment in karatsuba_mul + bigint_sub_abs(z0, x0, x1, N2, workspace); + karatsuba_sqr(ws0, z0, N2, ws1); + + karatsuba_sqr(z0, x0, N2, ws1); + karatsuba_sqr(z1, x1, N2, ws1); + + const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N); + word z_carry = bigint_add2_nc(z + N2, N, ws1, N); + + z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1); + bigint_add2_nc(z + N + N2, N2, &z_carry, 1); + + /* + * This is only actually required if cmp (result of bigint_sub_abs) is != 0, + * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a + * timing channel. + */ + bigint_sub2(z + N2, 2*N-N2, ws0, N); + } + +/* +* Pick a good size for the Karatsuba multiply +*/ +size_t karatsuba_size(size_t z_size, + size_t x_size, size_t x_sw, + size_t y_size, size_t y_sw) + { + if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size) + return 0; + + if(((x_size == x_sw) && (x_size % 2)) || + ((y_size == y_sw) && (y_size % 2))) + return 0; + + const size_t start = (x_sw > y_sw) ? x_sw : y_sw; + const size_t end = (x_size < y_size) ? x_size : y_size; + + if(start == end) + { + if(start % 2) + return 0; + return start; + } + + for(size_t j = start; j <= end; ++j) + { + if(j % 2) + continue; + + if(2*j > z_size) + return 0; + + if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size) + { + if(j % 4 == 2 && + (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size) + return j+2; + return j; + } + } + + return 0; + } + +/* +* Pick a good size for the Karatsuba squaring +*/ +size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw) + { + if(x_sw == x_size) + { + if(x_sw % 2) + return 0; + return x_sw; + } + + for(size_t j = x_sw; j <= x_size; ++j) + { + if(j % 2) + continue; + + if(2*j > z_size) + return 0; + + if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size) + return j+2; + return j; + } + + return 0; + } + +template<size_t SZ> +inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, + size_t y_sw, size_t y_size, + size_t z_size) + { + return (x_sw <= SZ && x_size >= SZ && + y_sw <= SZ && y_size >= SZ && + z_size >= 2*SZ); + } + +template<size_t SZ> +inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, + size_t z_size) + { + return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ); + } + +} + +void bigint_mul(word z[], size_t z_size, + const word x[], size_t x_size, size_t x_sw, + const word y[], size_t y_size, size_t y_sw, + word workspace[], size_t ws_size) + { + clear_mem(z, z_size); + + if(x_sw == 1) + { + bigint_linmul3(z, y, y_sw, x[0]); + } + else if(y_sw == 1) + { + bigint_linmul3(z, x, x_sw, y[0]); + } + else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size)) + { + bigint_comba_mul4(z, x, y); + } + else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size)) + { + bigint_comba_mul6(z, x, y); + } + else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size)) + { + bigint_comba_mul8(z, x, y); + } + else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size)) + { + bigint_comba_mul9(z, x, y); + } + else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size)) + { + bigint_comba_mul16(z, x, y); + } + else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size)) + { + bigint_comba_mul24(z, x, y); + } + else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD || + y_sw < KARATSUBA_MULTIPLY_THRESHOLD || + !workspace) + { + basecase_mul(z, z_size, x, x_sw, y, y_sw); + } + else + { + const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw); + + if(N && z_size >= 2*N && ws_size >= 2*N) + karatsuba_mul(z, x, y, N, workspace); + else + basecase_mul(z, z_size, x, x_sw, y, y_sw); + } + } + +/* +* Squaring Algorithm Dispatcher +*/ +void bigint_sqr(word z[], size_t z_size, + const word x[], size_t x_size, size_t x_sw, + word workspace[], size_t ws_size) + { + clear_mem(z, z_size); + + BOTAN_ASSERT(z_size/2 >= x_sw, "Output size is sufficient"); + + if(x_sw == 1) + { + bigint_linmul3(z, x, x_sw, x[0]); + } + else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size)) + { + bigint_comba_sqr4(z, x); + } + else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size)) + { + bigint_comba_sqr6(z, x); + } + else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size)) + { + bigint_comba_sqr8(z, x); + } + else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size)) + { + bigint_comba_sqr9(z, x); + } + else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size)) + { + bigint_comba_sqr16(z, x); + } + else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size)) + { + bigint_comba_sqr24(z, x); + } + else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace) + { + basecase_sqr(z, z_size, x, x_sw); + } + else + { + const size_t N = karatsuba_size(z_size, x_size, x_sw); + + if(N && z_size >= 2*N && ws_size >= 2*N) + karatsuba_sqr(z, x, N, workspace); + else + basecase_sqr(z, z_size, x, x_sw); + } + } + +} diff --git a/comm/third_party/botan/src/lib/math/mp/mp_madd.h b/comm/third_party/botan/src/lib/math/mp/mp_madd.h new file mode 100644 index 0000000000..531d6e6634 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/mp/mp_madd.h @@ -0,0 +1,146 @@ +/* +* Lowest Level MPI Algorithms +* (C) 1999-2008,2013 Jack Lloyd +* 2006 Luca Piccarreta +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_MP_WORD_MULADD_H_ +#define BOTAN_MP_WORD_MULADD_H_ + +#include <botan/types.h> +#include <botan/mul128.h> + +namespace Botan { + +#if (BOTAN_MP_WORD_BITS == 32) + typedef uint64_t dword; + #define BOTAN_HAS_MP_DWORD + +#elif (BOTAN_MP_WORD_BITS == 64) + #if defined(BOTAN_TARGET_HAS_NATIVE_UINT128) + typedef uint128_t dword; + #define BOTAN_HAS_MP_DWORD + #else + // No native 128 bit integer type; use mul64x64_128 instead + #endif + +#else + #error BOTAN_MP_WORD_BITS must be 32 or 64 +#endif + +#if defined(BOTAN_USE_GCC_INLINE_ASM) + + #if defined(BOTAN_TARGET_ARCH_IS_X86_32) && (BOTAN_MP_WORD_BITS == 32) + #define BOTAN_MP_USE_X86_32_ASM + #elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && (BOTAN_MP_WORD_BITS == 64) + #define BOTAN_MP_USE_X86_64_ASM + #endif + +#endif + +/* +* Word Multiply/Add +*/ +inline word word_madd2(word a, word b, word* c) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm(R"( + mull %[b] + addl %[c],%[a] + adcl $0,%[carry] + )" + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) + : "0"(a), "1"(b), [c]"g"(*c) : "cc"); + + return a; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + asm(R"( + mulq %[b] + addq %[c],%[a] + adcq $0,%[carry] + )" + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) + : "0"(a), "1"(b), [c]"g"(*c) : "cc"); + + return a; + +#elif defined(BOTAN_HAS_MP_DWORD) + const dword s = static_cast<dword>(a) * b + *c; + *c = static_cast<word>(s >> BOTAN_MP_WORD_BITS); + return static_cast<word>(s); +#else + static_assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size"); + + word hi = 0, lo = 0; + + mul64x64_128(a, b, &lo, &hi); + + lo += *c; + hi += (lo < *c); // carry? + + *c = hi; + return lo; +#endif + } + +/* +* Word Multiply/Add +*/ +inline word word_madd3(word a, word b, word c, word* d) + { +#if defined(BOTAN_MP_USE_X86_32_ASM) + asm(R"( + mull %[b] + + addl %[c],%[a] + adcl $0,%[carry] + + addl %[d],%[a] + adcl $0,%[carry] + )" + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) + : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); + + return a; + +#elif defined(BOTAN_MP_USE_X86_64_ASM) + asm(R"( + mulq %[b] + addq %[c],%[a] + adcq $0,%[carry] + addq %[d],%[a] + adcq $0,%[carry] + )" + : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) + : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); + + return a; + +#elif defined(BOTAN_HAS_MP_DWORD) + const dword s = static_cast<dword>(a) * b + c + *d; + *d = static_cast<word>(s >> BOTAN_MP_WORD_BITS); + return static_cast<word>(s); +#else + static_assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size"); + + word hi = 0, lo = 0; + + mul64x64_128(a, b, &lo, &hi); + + lo += c; + hi += (lo < c); // carry? + + lo += *d; + hi += (lo < *d); // carry? + + *d = hi; + return lo; +#endif + } + +} + +#endif diff --git a/comm/third_party/botan/src/lib/math/mp/mp_monty.cpp b/comm/third_party/botan/src/lib/math/mp/mp_monty.cpp new file mode 100644 index 0000000000..433d3ff358 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/mp/mp_monty.cpp @@ -0,0 +1,133 @@ +/* +* Montgomery Reduction +* (C) 1999-2011 Jack Lloyd +* 2006 Luca Piccarreta +* 2016 Matthias Gierlings +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/internal/mp_core.h> +#include <botan/internal/mp_monty.h> +#include <botan/internal/mp_madd.h> +#include <botan/internal/mp_asmi.h> +#include <botan/internal/ct_utils.h> +#include <botan/mem_ops.h> +#include <botan/exceptn.h> + +namespace Botan { + +namespace { + +/* +* Montgomery reduction - product scanning form +* +* https://www.iacr.org/archive/ches2005/006.pdf +* https://eprint.iacr.org/2013/882.pdf +* https://www.microsoft.com/en-us/research/wp-content/uploads/1996/01/j37acmon.pdf +*/ +void bigint_monty_redc_generic(word z[], size_t z_size, + const word p[], size_t p_size, word p_dash, + word ws[]) + { + word w2 = 0, w1 = 0, w0 = 0; + + w0 = z[0]; + + ws[0] = w0 * p_dash; + + word3_muladd(&w2, &w1, &w0, ws[0], p[0]); + + w0 = w1; + w1 = w2; + w2 = 0; + + for(size_t i = 1; i != p_size; ++i) + { + for(size_t j = 0; j < i; ++j) + { + word3_muladd(&w2, &w1, &w0, ws[j], p[i-j]); + } + + word3_add(&w2, &w1, &w0, z[i]); + + ws[i] = w0 * p_dash; + + word3_muladd(&w2, &w1, &w0, ws[i], p[0]); + + w0 = w1; + w1 = w2; + w2 = 0; + } + + for(size_t i = 0; i != p_size; ++i) + { + for(size_t j = i + 1; j != p_size; ++j) + { + word3_muladd(&w2, &w1, &w0, ws[j], p[p_size + i-j]); + } + + word3_add(&w2, &w1, &w0, z[p_size+i]); + + ws[i] = w0; + w0 = w1; + w1 = w2; + w2 = 0; + } + + word3_add(&w2, &w1, &w0, z[z_size-1]); + + ws[p_size] = w0; + ws[p_size+1] = w1; + + /* + * The result might need to be reduced mod p. To avoid a timing + * channel, always perform the subtraction. If in the compution + * of x - p a borrow is required then x was already < p. + * + * x starts at ws[0] and is p_size+1 bytes long. + * x - p starts at ws[p_size+1] and is also p_size+1 bytes log + * + * Select which address to copy from indexing off of the final + * borrow. + */ + + // word borrow = bigint_sub3(ws + p_size + 1, ws, p_size + 1, p, p_size); + word borrow = 0; + for(size_t i = 0; i != p_size; ++i) + ws[p_size + 1 + i] = word_sub(ws[i], p[i], &borrow); + ws[2*p_size+1] = word_sub(ws[p_size], 0, &borrow); + + BOTAN_DEBUG_ASSERT(borrow == 0 || borrow == 1); + + CT::conditional_copy_mem(borrow, z, ws, ws + (p_size + 1), (p_size + 1)); + clear_mem(z + p_size, z_size - p_size - 2); + } + +} + +void bigint_monty_redc(word z[], + const word p[], size_t p_size, word p_dash, + word ws[], size_t ws_size) + { + const size_t z_size = 2*(p_size+1); + + BOTAN_ARG_CHECK(ws_size >= z_size, "workspace too small"); + + if(p_size == 4) + bigint_monty_redc_4(z, p, p_dash, ws); + else if(p_size == 6) + bigint_monty_redc_6(z, p, p_dash, ws); + else if(p_size == 8) + bigint_monty_redc_8(z, p, p_dash, ws); + else if(p_size == 16) + bigint_monty_redc_16(z, p, p_dash, ws); + else if(p_size == 24) + bigint_monty_redc_24(z, p, p_dash, ws); + else if(p_size == 32) + bigint_monty_redc_32(z, p, p_dash, ws); + else + bigint_monty_redc_generic(z, z_size, p, p_size, p_dash, ws); + } + +} diff --git a/comm/third_party/botan/src/lib/math/mp/mp_monty.h b/comm/third_party/botan/src/lib/math/mp/mp_monty.h new file mode 100644 index 0000000000..7462272d5c --- /dev/null +++ b/comm/third_party/botan/src/lib/math/mp/mp_monty.h @@ -0,0 +1,31 @@ +/* +* (C) 2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_MP_MONTY_H_ +#define BOTAN_MP_MONTY_H_ + +#include <botan/types.h> + +namespace Botan { + +/* +* Each of these functions makes the following assumptions: +* +* z_size >= 2*(p_size + 1) +* ws_size >= z_size +*/ + +void bigint_monty_redc_4(word z[], const word p[], word p_dash, word ws[]); +void bigint_monty_redc_6(word z[], const word p[], word p_dash, word ws[]); +void bigint_monty_redc_8(word z[], const word p[], word p_dash, word ws[]); +void bigint_monty_redc_16(word z[], const word p[], word p_dash, word ws[]); +void bigint_monty_redc_24(word z[], const word p[], word p_dash, word ws[]); +void bigint_monty_redc_32(word z[], const word p[], word p_dash, word ws[]); + + +} + +#endif diff --git a/comm/third_party/botan/src/lib/math/mp/mp_monty_n.cpp b/comm/third_party/botan/src/lib/math/mp/mp_monty_n.cpp new file mode 100644 index 0000000000..0331d4a073 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/mp/mp_monty_n.cpp @@ -0,0 +1,2614 @@ +/* +* This file was automatically generated by ./src/scripts/monty.py on 2018-06-11 +* All manual changes will be lost. Edit the script instead. +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/internal/mp_monty.h> +#include <botan/internal/mp_core.h> +#include <botan/internal/mp_asmi.h> +#include <botan/internal/ct_utils.h> + +namespace Botan { + +void bigint_monty_redc_4(word z[], const word p[4], word p_dash, word ws[]) + { + word w2 = 0, w1 = 0, w0 = 0; + w0 = z[0]; + ws[0] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[0], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[1]); + word3_add(&w2, &w1, &w0, z[1]); + ws[1] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[1], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[2]); + word3_muladd(&w2, &w1, &w0, ws[1], p[1]); + word3_add(&w2, &w1, &w0, z[2]); + ws[2] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[2], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[3]); + word3_muladd(&w2, &w1, &w0, ws[1], p[2]); + word3_muladd(&w2, &w1, &w0, ws[2], p[1]); + word3_add(&w2, &w1, &w0, z[3]); + ws[3] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[3], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[1], p[3]); + word3_muladd(&w2, &w1, &w0, ws[2], p[2]); + word3_muladd(&w2, &w1, &w0, ws[3], p[1]); + word3_add(&w2, &w1, &w0, z[4]); + ws[0] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[2], p[3]); + word3_muladd(&w2, &w1, &w0, ws[3], p[2]); + word3_add(&w2, &w1, &w0, z[5]); + ws[1] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[3], p[3]); + word3_add(&w2, &w1, &w0, z[6]); + ws[2] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_add(&w2, &w1, &w0, z[7]); + ws[3] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_add(&w2, &w1, &w0, z[9]); + ws[4] = w0; + ws[5] = w1; + word borrow = 0; + ws[5] = word_sub(ws[0], p[0], &borrow); + ws[6] = word_sub(ws[1], p[1], &borrow); + ws[7] = word_sub(ws[2], p[2], &borrow); + ws[8] = word_sub(ws[3], p[3], &borrow); + ws[9] = word_sub(ws[4], 0, &borrow); + CT::conditional_copy_mem(borrow, z, ws, ws + 5, 5); + clear_mem(z + 4, 2*(4+1) - 4); + } + +void bigint_monty_redc_6(word z[], const word p[6], word p_dash, word ws[]) + { + word w2 = 0, w1 = 0, w0 = 0; + w0 = z[0]; + ws[0] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[0], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[1]); + word3_add(&w2, &w1, &w0, z[1]); + ws[1] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[1], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[2]); + word3_muladd(&w2, &w1, &w0, ws[1], p[1]); + word3_add(&w2, &w1, &w0, z[2]); + ws[2] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[2], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[3]); + word3_muladd(&w2, &w1, &w0, ws[1], p[2]); + word3_muladd(&w2, &w1, &w0, ws[2], p[1]); + word3_add(&w2, &w1, &w0, z[3]); + ws[3] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[3], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[4]); + word3_muladd(&w2, &w1, &w0, ws[1], p[3]); + word3_muladd(&w2, &w1, &w0, ws[2], p[2]); + word3_muladd(&w2, &w1, &w0, ws[3], p[1]); + word3_add(&w2, &w1, &w0, z[4]); + ws[4] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[4], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[5]); + word3_muladd(&w2, &w1, &w0, ws[1], p[4]); + word3_muladd(&w2, &w1, &w0, ws[2], p[3]); + word3_muladd(&w2, &w1, &w0, ws[3], p[2]); + word3_muladd(&w2, &w1, &w0, ws[4], p[1]); + word3_add(&w2, &w1, &w0, z[5]); + ws[5] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[5], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[1], p[5]); + word3_muladd(&w2, &w1, &w0, ws[2], p[4]); + word3_muladd(&w2, &w1, &w0, ws[3], p[3]); + word3_muladd(&w2, &w1, &w0, ws[4], p[2]); + word3_muladd(&w2, &w1, &w0, ws[5], p[1]); + word3_add(&w2, &w1, &w0, z[6]); + ws[0] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[2], p[5]); + word3_muladd(&w2, &w1, &w0, ws[3], p[4]); + word3_muladd(&w2, &w1, &w0, ws[4], p[3]); + word3_muladd(&w2, &w1, &w0, ws[5], p[2]); + word3_add(&w2, &w1, &w0, z[7]); + ws[1] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[3], p[5]); + word3_muladd(&w2, &w1, &w0, ws[4], p[4]); + word3_muladd(&w2, &w1, &w0, ws[5], p[3]); + word3_add(&w2, &w1, &w0, z[8]); + ws[2] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[4], p[5]); + word3_muladd(&w2, &w1, &w0, ws[5], p[4]); + word3_add(&w2, &w1, &w0, z[9]); + ws[3] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[5], p[5]); + word3_add(&w2, &w1, &w0, z[10]); + ws[4] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_add(&w2, &w1, &w0, z[11]); + ws[5] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_add(&w2, &w1, &w0, z[13]); + ws[6] = w0; + ws[7] = w1; + word borrow = 0; + ws[7] = word_sub(ws[0], p[0], &borrow); + ws[8] = word_sub(ws[1], p[1], &borrow); + ws[9] = word_sub(ws[2], p[2], &borrow); + ws[10] = word_sub(ws[3], p[3], &borrow); + ws[11] = word_sub(ws[4], p[4], &borrow); + ws[12] = word_sub(ws[5], p[5], &borrow); + ws[13] = word_sub(ws[6], 0, &borrow); + CT::conditional_copy_mem(borrow, z, ws, ws + 7, 7); + clear_mem(z + 6, 2*(6+1) - 6); + } + +void bigint_monty_redc_8(word z[], const word p[8], word p_dash, word ws[]) + { + word w2 = 0, w1 = 0, w0 = 0; + w0 = z[0]; + ws[0] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[0], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[1]); + word3_add(&w2, &w1, &w0, z[1]); + ws[1] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[1], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[2]); + word3_muladd(&w2, &w1, &w0, ws[1], p[1]); + word3_add(&w2, &w1, &w0, z[2]); + ws[2] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[2], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[3]); + word3_muladd(&w2, &w1, &w0, ws[1], p[2]); + word3_muladd(&w2, &w1, &w0, ws[2], p[1]); + word3_add(&w2, &w1, &w0, z[3]); + ws[3] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[3], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[4]); + word3_muladd(&w2, &w1, &w0, ws[1], p[3]); + word3_muladd(&w2, &w1, &w0, ws[2], p[2]); + word3_muladd(&w2, &w1, &w0, ws[3], p[1]); + word3_add(&w2, &w1, &w0, z[4]); + ws[4] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[4], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[5]); + word3_muladd(&w2, &w1, &w0, ws[1], p[4]); + word3_muladd(&w2, &w1, &w0, ws[2], p[3]); + word3_muladd(&w2, &w1, &w0, ws[3], p[2]); + word3_muladd(&w2, &w1, &w0, ws[4], p[1]); + word3_add(&w2, &w1, &w0, z[5]); + ws[5] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[5], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[6]); + word3_muladd(&w2, &w1, &w0, ws[1], p[5]); + word3_muladd(&w2, &w1, &w0, ws[2], p[4]); + word3_muladd(&w2, &w1, &w0, ws[3], p[3]); + word3_muladd(&w2, &w1, &w0, ws[4], p[2]); + word3_muladd(&w2, &w1, &w0, ws[5], p[1]); + word3_add(&w2, &w1, &w0, z[6]); + ws[6] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[6], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[7]); + word3_muladd(&w2, &w1, &w0, ws[1], p[6]); + word3_muladd(&w2, &w1, &w0, ws[2], p[5]); + word3_muladd(&w2, &w1, &w0, ws[3], p[4]); + word3_muladd(&w2, &w1, &w0, ws[4], p[3]); + word3_muladd(&w2, &w1, &w0, ws[5], p[2]); + word3_muladd(&w2, &w1, &w0, ws[6], p[1]); + word3_add(&w2, &w1, &w0, z[7]); + ws[7] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[7], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[1], p[7]); + word3_muladd(&w2, &w1, &w0, ws[2], p[6]); + word3_muladd(&w2, &w1, &w0, ws[3], p[5]); + word3_muladd(&w2, &w1, &w0, ws[4], p[4]); + word3_muladd(&w2, &w1, &w0, ws[5], p[3]); + word3_muladd(&w2, &w1, &w0, ws[6], p[2]); + word3_muladd(&w2, &w1, &w0, ws[7], p[1]); + word3_add(&w2, &w1, &w0, z[8]); + ws[0] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[2], p[7]); + word3_muladd(&w2, &w1, &w0, ws[3], p[6]); + word3_muladd(&w2, &w1, &w0, ws[4], p[5]); + word3_muladd(&w2, &w1, &w0, ws[5], p[4]); + word3_muladd(&w2, &w1, &w0, ws[6], p[3]); + word3_muladd(&w2, &w1, &w0, ws[7], p[2]); + word3_add(&w2, &w1, &w0, z[9]); + ws[1] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[3], p[7]); + word3_muladd(&w2, &w1, &w0, ws[4], p[6]); + word3_muladd(&w2, &w1, &w0, ws[5], p[5]); + word3_muladd(&w2, &w1, &w0, ws[6], p[4]); + word3_muladd(&w2, &w1, &w0, ws[7], p[3]); + word3_add(&w2, &w1, &w0, z[10]); + ws[2] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[4], p[7]); + word3_muladd(&w2, &w1, &w0, ws[5], p[6]); + word3_muladd(&w2, &w1, &w0, ws[6], p[5]); + word3_muladd(&w2, &w1, &w0, ws[7], p[4]); + word3_add(&w2, &w1, &w0, z[11]); + ws[3] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[5], p[7]); + word3_muladd(&w2, &w1, &w0, ws[6], p[6]); + word3_muladd(&w2, &w1, &w0, ws[7], p[5]); + word3_add(&w2, &w1, &w0, z[12]); + ws[4] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[6], p[7]); + word3_muladd(&w2, &w1, &w0, ws[7], p[6]); + word3_add(&w2, &w1, &w0, z[13]); + ws[5] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[7], p[7]); + word3_add(&w2, &w1, &w0, z[14]); + ws[6] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_add(&w2, &w1, &w0, z[15]); + ws[7] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_add(&w2, &w1, &w0, z[17]); + ws[8] = w0; + ws[9] = w1; + word borrow = 0; + ws[9] = word_sub(ws[0], p[0], &borrow); + ws[10] = word_sub(ws[1], p[1], &borrow); + ws[11] = word_sub(ws[2], p[2], &borrow); + ws[12] = word_sub(ws[3], p[3], &borrow); + ws[13] = word_sub(ws[4], p[4], &borrow); + ws[14] = word_sub(ws[5], p[5], &borrow); + ws[15] = word_sub(ws[6], p[6], &borrow); + ws[16] = word_sub(ws[7], p[7], &borrow); + ws[17] = word_sub(ws[8], 0, &borrow); + CT::conditional_copy_mem(borrow, z, ws, ws + 9, 9); + clear_mem(z + 8, 2*(8+1) - 8); + } + +void bigint_monty_redc_16(word z[], const word p[16], word p_dash, word ws[]) + { + word w2 = 0, w1 = 0, w0 = 0; + w0 = z[0]; + ws[0] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[0], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[1]); + word3_add(&w2, &w1, &w0, z[1]); + ws[1] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[1], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[2]); + word3_muladd(&w2, &w1, &w0, ws[1], p[1]); + word3_add(&w2, &w1, &w0, z[2]); + ws[2] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[2], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[3]); + word3_muladd(&w2, &w1, &w0, ws[1], p[2]); + word3_muladd(&w2, &w1, &w0, ws[2], p[1]); + word3_add(&w2, &w1, &w0, z[3]); + ws[3] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[3], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[4]); + word3_muladd(&w2, &w1, &w0, ws[1], p[3]); + word3_muladd(&w2, &w1, &w0, ws[2], p[2]); + word3_muladd(&w2, &w1, &w0, ws[3], p[1]); + word3_add(&w2, &w1, &w0, z[4]); + ws[4] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[4], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[5]); + word3_muladd(&w2, &w1, &w0, ws[1], p[4]); + word3_muladd(&w2, &w1, &w0, ws[2], p[3]); + word3_muladd(&w2, &w1, &w0, ws[3], p[2]); + word3_muladd(&w2, &w1, &w0, ws[4], p[1]); + word3_add(&w2, &w1, &w0, z[5]); + ws[5] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[5], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[6]); + word3_muladd(&w2, &w1, &w0, ws[1], p[5]); + word3_muladd(&w2, &w1, &w0, ws[2], p[4]); + word3_muladd(&w2, &w1, &w0, ws[3], p[3]); + word3_muladd(&w2, &w1, &w0, ws[4], p[2]); + word3_muladd(&w2, &w1, &w0, ws[5], p[1]); + word3_add(&w2, &w1, &w0, z[6]); + ws[6] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[6], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[7]); + word3_muladd(&w2, &w1, &w0, ws[1], p[6]); + word3_muladd(&w2, &w1, &w0, ws[2], p[5]); + word3_muladd(&w2, &w1, &w0, ws[3], p[4]); + word3_muladd(&w2, &w1, &w0, ws[4], p[3]); + word3_muladd(&w2, &w1, &w0, ws[5], p[2]); + word3_muladd(&w2, &w1, &w0, ws[6], p[1]); + word3_add(&w2, &w1, &w0, z[7]); + ws[7] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[7], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[8]); + word3_muladd(&w2, &w1, &w0, ws[1], p[7]); + word3_muladd(&w2, &w1, &w0, ws[2], p[6]); + word3_muladd(&w2, &w1, &w0, ws[3], p[5]); + word3_muladd(&w2, &w1, &w0, ws[4], p[4]); + word3_muladd(&w2, &w1, &w0, ws[5], p[3]); + word3_muladd(&w2, &w1, &w0, ws[6], p[2]); + word3_muladd(&w2, &w1, &w0, ws[7], p[1]); + word3_add(&w2, &w1, &w0, z[8]); + ws[8] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[8], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[9]); + word3_muladd(&w2, &w1, &w0, ws[1], p[8]); + word3_muladd(&w2, &w1, &w0, ws[2], p[7]); + word3_muladd(&w2, &w1, &w0, ws[3], p[6]); + word3_muladd(&w2, &w1, &w0, ws[4], p[5]); + word3_muladd(&w2, &w1, &w0, ws[5], p[4]); + word3_muladd(&w2, &w1, &w0, ws[6], p[3]); + word3_muladd(&w2, &w1, &w0, ws[7], p[2]); + word3_muladd(&w2, &w1, &w0, ws[8], p[1]); + word3_add(&w2, &w1, &w0, z[9]); + ws[9] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[9], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[10]); + word3_muladd(&w2, &w1, &w0, ws[1], p[9]); + word3_muladd(&w2, &w1, &w0, ws[2], p[8]); + word3_muladd(&w2, &w1, &w0, ws[3], p[7]); + word3_muladd(&w2, &w1, &w0, ws[4], p[6]); + word3_muladd(&w2, &w1, &w0, ws[5], p[5]); + word3_muladd(&w2, &w1, &w0, ws[6], p[4]); + word3_muladd(&w2, &w1, &w0, ws[7], p[3]); + word3_muladd(&w2, &w1, &w0, ws[8], p[2]); + word3_muladd(&w2, &w1, &w0, ws[9], p[1]); + word3_add(&w2, &w1, &w0, z[10]); + ws[10] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[10], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[11]); + word3_muladd(&w2, &w1, &w0, ws[1], p[10]); + word3_muladd(&w2, &w1, &w0, ws[2], p[9]); + word3_muladd(&w2, &w1, &w0, ws[3], p[8]); + word3_muladd(&w2, &w1, &w0, ws[4], p[7]); + word3_muladd(&w2, &w1, &w0, ws[5], p[6]); + word3_muladd(&w2, &w1, &w0, ws[6], p[5]); + word3_muladd(&w2, &w1, &w0, ws[7], p[4]); + word3_muladd(&w2, &w1, &w0, ws[8], p[3]); + word3_muladd(&w2, &w1, &w0, ws[9], p[2]); + word3_muladd(&w2, &w1, &w0, ws[10], p[1]); + word3_add(&w2, &w1, &w0, z[11]); + ws[11] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[11], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[12]); + word3_muladd(&w2, &w1, &w0, ws[1], p[11]); + word3_muladd(&w2, &w1, &w0, ws[2], p[10]); + word3_muladd(&w2, &w1, &w0, ws[3], p[9]); + word3_muladd(&w2, &w1, &w0, ws[4], p[8]); + word3_muladd(&w2, &w1, &w0, ws[5], p[7]); + word3_muladd(&w2, &w1, &w0, ws[6], p[6]); + word3_muladd(&w2, &w1, &w0, ws[7], p[5]); + word3_muladd(&w2, &w1, &w0, ws[8], p[4]); + word3_muladd(&w2, &w1, &w0, ws[9], p[3]); + word3_muladd(&w2, &w1, &w0, ws[10], p[2]); + word3_muladd(&w2, &w1, &w0, ws[11], p[1]); + word3_add(&w2, &w1, &w0, z[12]); + ws[12] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[12], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[13]); + word3_muladd(&w2, &w1, &w0, ws[1], p[12]); + word3_muladd(&w2, &w1, &w0, ws[2], p[11]); + word3_muladd(&w2, &w1, &w0, ws[3], p[10]); + word3_muladd(&w2, &w1, &w0, ws[4], p[9]); + word3_muladd(&w2, &w1, &w0, ws[5], p[8]); + word3_muladd(&w2, &w1, &w0, ws[6], p[7]); + word3_muladd(&w2, &w1, &w0, ws[7], p[6]); + word3_muladd(&w2, &w1, &w0, ws[8], p[5]); + word3_muladd(&w2, &w1, &w0, ws[9], p[4]); + word3_muladd(&w2, &w1, &w0, ws[10], p[3]); + word3_muladd(&w2, &w1, &w0, ws[11], p[2]); + word3_muladd(&w2, &w1, &w0, ws[12], p[1]); + word3_add(&w2, &w1, &w0, z[13]); + ws[13] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[13], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[14]); + word3_muladd(&w2, &w1, &w0, ws[1], p[13]); + word3_muladd(&w2, &w1, &w0, ws[2], p[12]); + word3_muladd(&w2, &w1, &w0, ws[3], p[11]); + word3_muladd(&w2, &w1, &w0, ws[4], p[10]); + word3_muladd(&w2, &w1, &w0, ws[5], p[9]); + word3_muladd(&w2, &w1, &w0, ws[6], p[8]); + word3_muladd(&w2, &w1, &w0, ws[7], p[7]); + word3_muladd(&w2, &w1, &w0, ws[8], p[6]); + word3_muladd(&w2, &w1, &w0, ws[9], p[5]); + word3_muladd(&w2, &w1, &w0, ws[10], p[4]); + word3_muladd(&w2, &w1, &w0, ws[11], p[3]); + word3_muladd(&w2, &w1, &w0, ws[12], p[2]); + word3_muladd(&w2, &w1, &w0, ws[13], p[1]); + word3_add(&w2, &w1, &w0, z[14]); + ws[14] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[14], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[15]); + word3_muladd(&w2, &w1, &w0, ws[1], p[14]); + word3_muladd(&w2, &w1, &w0, ws[2], p[13]); + word3_muladd(&w2, &w1, &w0, ws[3], p[12]); + word3_muladd(&w2, &w1, &w0, ws[4], p[11]); + word3_muladd(&w2, &w1, &w0, ws[5], p[10]); + word3_muladd(&w2, &w1, &w0, ws[6], p[9]); + word3_muladd(&w2, &w1, &w0, ws[7], p[8]); + word3_muladd(&w2, &w1, &w0, ws[8], p[7]); + word3_muladd(&w2, &w1, &w0, ws[9], p[6]); + word3_muladd(&w2, &w1, &w0, ws[10], p[5]); + word3_muladd(&w2, &w1, &w0, ws[11], p[4]); + word3_muladd(&w2, &w1, &w0, ws[12], p[3]); + word3_muladd(&w2, &w1, &w0, ws[13], p[2]); + word3_muladd(&w2, &w1, &w0, ws[14], p[1]); + word3_add(&w2, &w1, &w0, z[15]); + ws[15] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[15], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[1], p[15]); + word3_muladd(&w2, &w1, &w0, ws[2], p[14]); + word3_muladd(&w2, &w1, &w0, ws[3], p[13]); + word3_muladd(&w2, &w1, &w0, ws[4], p[12]); + word3_muladd(&w2, &w1, &w0, ws[5], p[11]); + word3_muladd(&w2, &w1, &w0, ws[6], p[10]); + word3_muladd(&w2, &w1, &w0, ws[7], p[9]); + word3_muladd(&w2, &w1, &w0, ws[8], p[8]); + word3_muladd(&w2, &w1, &w0, ws[9], p[7]); + word3_muladd(&w2, &w1, &w0, ws[10], p[6]); + word3_muladd(&w2, &w1, &w0, ws[11], p[5]); + word3_muladd(&w2, &w1, &w0, ws[12], p[4]); + word3_muladd(&w2, &w1, &w0, ws[13], p[3]); + word3_muladd(&w2, &w1, &w0, ws[14], p[2]); + word3_muladd(&w2, &w1, &w0, ws[15], p[1]); + word3_add(&w2, &w1, &w0, z[16]); + ws[0] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[2], p[15]); + word3_muladd(&w2, &w1, &w0, ws[3], p[14]); + word3_muladd(&w2, &w1, &w0, ws[4], p[13]); + word3_muladd(&w2, &w1, &w0, ws[5], p[12]); + word3_muladd(&w2, &w1, &w0, ws[6], p[11]); + word3_muladd(&w2, &w1, &w0, ws[7], p[10]); + word3_muladd(&w2, &w1, &w0, ws[8], p[9]); + word3_muladd(&w2, &w1, &w0, ws[9], p[8]); + word3_muladd(&w2, &w1, &w0, ws[10], p[7]); + word3_muladd(&w2, &w1, &w0, ws[11], p[6]); + word3_muladd(&w2, &w1, &w0, ws[12], p[5]); + word3_muladd(&w2, &w1, &w0, ws[13], p[4]); + word3_muladd(&w2, &w1, &w0, ws[14], p[3]); + word3_muladd(&w2, &w1, &w0, ws[15], p[2]); + word3_add(&w2, &w1, &w0, z[17]); + ws[1] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[3], p[15]); + word3_muladd(&w2, &w1, &w0, ws[4], p[14]); + word3_muladd(&w2, &w1, &w0, ws[5], p[13]); + word3_muladd(&w2, &w1, &w0, ws[6], p[12]); + word3_muladd(&w2, &w1, &w0, ws[7], p[11]); + word3_muladd(&w2, &w1, &w0, ws[8], p[10]); + word3_muladd(&w2, &w1, &w0, ws[9], p[9]); + word3_muladd(&w2, &w1, &w0, ws[10], p[8]); + word3_muladd(&w2, &w1, &w0, ws[11], p[7]); + word3_muladd(&w2, &w1, &w0, ws[12], p[6]); + word3_muladd(&w2, &w1, &w0, ws[13], p[5]); + word3_muladd(&w2, &w1, &w0, ws[14], p[4]); + word3_muladd(&w2, &w1, &w0, ws[15], p[3]); + word3_add(&w2, &w1, &w0, z[18]); + ws[2] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[4], p[15]); + word3_muladd(&w2, &w1, &w0, ws[5], p[14]); + word3_muladd(&w2, &w1, &w0, ws[6], p[13]); + word3_muladd(&w2, &w1, &w0, ws[7], p[12]); + word3_muladd(&w2, &w1, &w0, ws[8], p[11]); + word3_muladd(&w2, &w1, &w0, ws[9], p[10]); + word3_muladd(&w2, &w1, &w0, ws[10], p[9]); + word3_muladd(&w2, &w1, &w0, ws[11], p[8]); + word3_muladd(&w2, &w1, &w0, ws[12], p[7]); + word3_muladd(&w2, &w1, &w0, ws[13], p[6]); + word3_muladd(&w2, &w1, &w0, ws[14], p[5]); + word3_muladd(&w2, &w1, &w0, ws[15], p[4]); + word3_add(&w2, &w1, &w0, z[19]); + ws[3] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[5], p[15]); + word3_muladd(&w2, &w1, &w0, ws[6], p[14]); + word3_muladd(&w2, &w1, &w0, ws[7], p[13]); + word3_muladd(&w2, &w1, &w0, ws[8], p[12]); + word3_muladd(&w2, &w1, &w0, ws[9], p[11]); + word3_muladd(&w2, &w1, &w0, ws[10], p[10]); + word3_muladd(&w2, &w1, &w0, ws[11], p[9]); + word3_muladd(&w2, &w1, &w0, ws[12], p[8]); + word3_muladd(&w2, &w1, &w0, ws[13], p[7]); + word3_muladd(&w2, &w1, &w0, ws[14], p[6]); + word3_muladd(&w2, &w1, &w0, ws[15], p[5]); + word3_add(&w2, &w1, &w0, z[20]); + ws[4] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[6], p[15]); + word3_muladd(&w2, &w1, &w0, ws[7], p[14]); + word3_muladd(&w2, &w1, &w0, ws[8], p[13]); + word3_muladd(&w2, &w1, &w0, ws[9], p[12]); + word3_muladd(&w2, &w1, &w0, ws[10], p[11]); + word3_muladd(&w2, &w1, &w0, ws[11], p[10]); + word3_muladd(&w2, &w1, &w0, ws[12], p[9]); + word3_muladd(&w2, &w1, &w0, ws[13], p[8]); + word3_muladd(&w2, &w1, &w0, ws[14], p[7]); + word3_muladd(&w2, &w1, &w0, ws[15], p[6]); + word3_add(&w2, &w1, &w0, z[21]); + ws[5] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[7], p[15]); + word3_muladd(&w2, &w1, &w0, ws[8], p[14]); + word3_muladd(&w2, &w1, &w0, ws[9], p[13]); + word3_muladd(&w2, &w1, &w0, ws[10], p[12]); + word3_muladd(&w2, &w1, &w0, ws[11], p[11]); + word3_muladd(&w2, &w1, &w0, ws[12], p[10]); + word3_muladd(&w2, &w1, &w0, ws[13], p[9]); + word3_muladd(&w2, &w1, &w0, ws[14], p[8]); + word3_muladd(&w2, &w1, &w0, ws[15], p[7]); + word3_add(&w2, &w1, &w0, z[22]); + ws[6] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[8], p[15]); + word3_muladd(&w2, &w1, &w0, ws[9], p[14]); + word3_muladd(&w2, &w1, &w0, ws[10], p[13]); + word3_muladd(&w2, &w1, &w0, ws[11], p[12]); + word3_muladd(&w2, &w1, &w0, ws[12], p[11]); + word3_muladd(&w2, &w1, &w0, ws[13], p[10]); + word3_muladd(&w2, &w1, &w0, ws[14], p[9]); + word3_muladd(&w2, &w1, &w0, ws[15], p[8]); + word3_add(&w2, &w1, &w0, z[23]); + ws[7] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[9], p[15]); + word3_muladd(&w2, &w1, &w0, ws[10], p[14]); + word3_muladd(&w2, &w1, &w0, ws[11], p[13]); + word3_muladd(&w2, &w1, &w0, ws[12], p[12]); + word3_muladd(&w2, &w1, &w0, ws[13], p[11]); + word3_muladd(&w2, &w1, &w0, ws[14], p[10]); + word3_muladd(&w2, &w1, &w0, ws[15], p[9]); + word3_add(&w2, &w1, &w0, z[24]); + ws[8] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[10], p[15]); + word3_muladd(&w2, &w1, &w0, ws[11], p[14]); + word3_muladd(&w2, &w1, &w0, ws[12], p[13]); + word3_muladd(&w2, &w1, &w0, ws[13], p[12]); + word3_muladd(&w2, &w1, &w0, ws[14], p[11]); + word3_muladd(&w2, &w1, &w0, ws[15], p[10]); + word3_add(&w2, &w1, &w0, z[25]); + ws[9] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[11], p[15]); + word3_muladd(&w2, &w1, &w0, ws[12], p[14]); + word3_muladd(&w2, &w1, &w0, ws[13], p[13]); + word3_muladd(&w2, &w1, &w0, ws[14], p[12]); + word3_muladd(&w2, &w1, &w0, ws[15], p[11]); + word3_add(&w2, &w1, &w0, z[26]); + ws[10] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[12], p[15]); + word3_muladd(&w2, &w1, &w0, ws[13], p[14]); + word3_muladd(&w2, &w1, &w0, ws[14], p[13]); + word3_muladd(&w2, &w1, &w0, ws[15], p[12]); + word3_add(&w2, &w1, &w0, z[27]); + ws[11] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[13], p[15]); + word3_muladd(&w2, &w1, &w0, ws[14], p[14]); + word3_muladd(&w2, &w1, &w0, ws[15], p[13]); + word3_add(&w2, &w1, &w0, z[28]); + ws[12] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[14], p[15]); + word3_muladd(&w2, &w1, &w0, ws[15], p[14]); + word3_add(&w2, &w1, &w0, z[29]); + ws[13] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[15], p[15]); + word3_add(&w2, &w1, &w0, z[30]); + ws[14] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_add(&w2, &w1, &w0, z[31]); + ws[15] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_add(&w2, &w1, &w0, z[33]); + ws[16] = w0; + ws[17] = w1; + word borrow = bigint_sub3(ws + 16 + 1, ws, 16 + 1, p, 16); + CT::conditional_copy_mem(borrow, z, ws, ws + 17, 17); + clear_mem(z + 16, 2*(16+1) - 16); + } + +void bigint_monty_redc_24(word z[], const word p[24], word p_dash, word ws[]) + { + word w2 = 0, w1 = 0, w0 = 0; + w0 = z[0]; + ws[0] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[0], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[1]); + word3_add(&w2, &w1, &w0, z[1]); + ws[1] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[1], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[2]); + word3_muladd(&w2, &w1, &w0, ws[1], p[1]); + word3_add(&w2, &w1, &w0, z[2]); + ws[2] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[2], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[3]); + word3_muladd(&w2, &w1, &w0, ws[1], p[2]); + word3_muladd(&w2, &w1, &w0, ws[2], p[1]); + word3_add(&w2, &w1, &w0, z[3]); + ws[3] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[3], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[4]); + word3_muladd(&w2, &w1, &w0, ws[1], p[3]); + word3_muladd(&w2, &w1, &w0, ws[2], p[2]); + word3_muladd(&w2, &w1, &w0, ws[3], p[1]); + word3_add(&w2, &w1, &w0, z[4]); + ws[4] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[4], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[5]); + word3_muladd(&w2, &w1, &w0, ws[1], p[4]); + word3_muladd(&w2, &w1, &w0, ws[2], p[3]); + word3_muladd(&w2, &w1, &w0, ws[3], p[2]); + word3_muladd(&w2, &w1, &w0, ws[4], p[1]); + word3_add(&w2, &w1, &w0, z[5]); + ws[5] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[5], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[6]); + word3_muladd(&w2, &w1, &w0, ws[1], p[5]); + word3_muladd(&w2, &w1, &w0, ws[2], p[4]); + word3_muladd(&w2, &w1, &w0, ws[3], p[3]); + word3_muladd(&w2, &w1, &w0, ws[4], p[2]); + word3_muladd(&w2, &w1, &w0, ws[5], p[1]); + word3_add(&w2, &w1, &w0, z[6]); + ws[6] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[6], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[7]); + word3_muladd(&w2, &w1, &w0, ws[1], p[6]); + word3_muladd(&w2, &w1, &w0, ws[2], p[5]); + word3_muladd(&w2, &w1, &w0, ws[3], p[4]); + word3_muladd(&w2, &w1, &w0, ws[4], p[3]); + word3_muladd(&w2, &w1, &w0, ws[5], p[2]); + word3_muladd(&w2, &w1, &w0, ws[6], p[1]); + word3_add(&w2, &w1, &w0, z[7]); + ws[7] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[7], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[8]); + word3_muladd(&w2, &w1, &w0, ws[1], p[7]); + word3_muladd(&w2, &w1, &w0, ws[2], p[6]); + word3_muladd(&w2, &w1, &w0, ws[3], p[5]); + word3_muladd(&w2, &w1, &w0, ws[4], p[4]); + word3_muladd(&w2, &w1, &w0, ws[5], p[3]); + word3_muladd(&w2, &w1, &w0, ws[6], p[2]); + word3_muladd(&w2, &w1, &w0, ws[7], p[1]); + word3_add(&w2, &w1, &w0, z[8]); + ws[8] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[8], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[9]); + word3_muladd(&w2, &w1, &w0, ws[1], p[8]); + word3_muladd(&w2, &w1, &w0, ws[2], p[7]); + word3_muladd(&w2, &w1, &w0, ws[3], p[6]); + word3_muladd(&w2, &w1, &w0, ws[4], p[5]); + word3_muladd(&w2, &w1, &w0, ws[5], p[4]); + word3_muladd(&w2, &w1, &w0, ws[6], p[3]); + word3_muladd(&w2, &w1, &w0, ws[7], p[2]); + word3_muladd(&w2, &w1, &w0, ws[8], p[1]); + word3_add(&w2, &w1, &w0, z[9]); + ws[9] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[9], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[10]); + word3_muladd(&w2, &w1, &w0, ws[1], p[9]); + word3_muladd(&w2, &w1, &w0, ws[2], p[8]); + word3_muladd(&w2, &w1, &w0, ws[3], p[7]); + word3_muladd(&w2, &w1, &w0, ws[4], p[6]); + word3_muladd(&w2, &w1, &w0, ws[5], p[5]); + word3_muladd(&w2, &w1, &w0, ws[6], p[4]); + word3_muladd(&w2, &w1, &w0, ws[7], p[3]); + word3_muladd(&w2, &w1, &w0, ws[8], p[2]); + word3_muladd(&w2, &w1, &w0, ws[9], p[1]); + word3_add(&w2, &w1, &w0, z[10]); + ws[10] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[10], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[11]); + word3_muladd(&w2, &w1, &w0, ws[1], p[10]); + word3_muladd(&w2, &w1, &w0, ws[2], p[9]); + word3_muladd(&w2, &w1, &w0, ws[3], p[8]); + word3_muladd(&w2, &w1, &w0, ws[4], p[7]); + word3_muladd(&w2, &w1, &w0, ws[5], p[6]); + word3_muladd(&w2, &w1, &w0, ws[6], p[5]); + word3_muladd(&w2, &w1, &w0, ws[7], p[4]); + word3_muladd(&w2, &w1, &w0, ws[8], p[3]); + word3_muladd(&w2, &w1, &w0, ws[9], p[2]); + word3_muladd(&w2, &w1, &w0, ws[10], p[1]); + word3_add(&w2, &w1, &w0, z[11]); + ws[11] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[11], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[12]); + word3_muladd(&w2, &w1, &w0, ws[1], p[11]); + word3_muladd(&w2, &w1, &w0, ws[2], p[10]); + word3_muladd(&w2, &w1, &w0, ws[3], p[9]); + word3_muladd(&w2, &w1, &w0, ws[4], p[8]); + word3_muladd(&w2, &w1, &w0, ws[5], p[7]); + word3_muladd(&w2, &w1, &w0, ws[6], p[6]); + word3_muladd(&w2, &w1, &w0, ws[7], p[5]); + word3_muladd(&w2, &w1, &w0, ws[8], p[4]); + word3_muladd(&w2, &w1, &w0, ws[9], p[3]); + word3_muladd(&w2, &w1, &w0, ws[10], p[2]); + word3_muladd(&w2, &w1, &w0, ws[11], p[1]); + word3_add(&w2, &w1, &w0, z[12]); + ws[12] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[12], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[13]); + word3_muladd(&w2, &w1, &w0, ws[1], p[12]); + word3_muladd(&w2, &w1, &w0, ws[2], p[11]); + word3_muladd(&w2, &w1, &w0, ws[3], p[10]); + word3_muladd(&w2, &w1, &w0, ws[4], p[9]); + word3_muladd(&w2, &w1, &w0, ws[5], p[8]); + word3_muladd(&w2, &w1, &w0, ws[6], p[7]); + word3_muladd(&w2, &w1, &w0, ws[7], p[6]); + word3_muladd(&w2, &w1, &w0, ws[8], p[5]); + word3_muladd(&w2, &w1, &w0, ws[9], p[4]); + word3_muladd(&w2, &w1, &w0, ws[10], p[3]); + word3_muladd(&w2, &w1, &w0, ws[11], p[2]); + word3_muladd(&w2, &w1, &w0, ws[12], p[1]); + word3_add(&w2, &w1, &w0, z[13]); + ws[13] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[13], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[14]); + word3_muladd(&w2, &w1, &w0, ws[1], p[13]); + word3_muladd(&w2, &w1, &w0, ws[2], p[12]); + word3_muladd(&w2, &w1, &w0, ws[3], p[11]); + word3_muladd(&w2, &w1, &w0, ws[4], p[10]); + word3_muladd(&w2, &w1, &w0, ws[5], p[9]); + word3_muladd(&w2, &w1, &w0, ws[6], p[8]); + word3_muladd(&w2, &w1, &w0, ws[7], p[7]); + word3_muladd(&w2, &w1, &w0, ws[8], p[6]); + word3_muladd(&w2, &w1, &w0, ws[9], p[5]); + word3_muladd(&w2, &w1, &w0, ws[10], p[4]); + word3_muladd(&w2, &w1, &w0, ws[11], p[3]); + word3_muladd(&w2, &w1, &w0, ws[12], p[2]); + word3_muladd(&w2, &w1, &w0, ws[13], p[1]); + word3_add(&w2, &w1, &w0, z[14]); + ws[14] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[14], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[15]); + word3_muladd(&w2, &w1, &w0, ws[1], p[14]); + word3_muladd(&w2, &w1, &w0, ws[2], p[13]); + word3_muladd(&w2, &w1, &w0, ws[3], p[12]); + word3_muladd(&w2, &w1, &w0, ws[4], p[11]); + word3_muladd(&w2, &w1, &w0, ws[5], p[10]); + word3_muladd(&w2, &w1, &w0, ws[6], p[9]); + word3_muladd(&w2, &w1, &w0, ws[7], p[8]); + word3_muladd(&w2, &w1, &w0, ws[8], p[7]); + word3_muladd(&w2, &w1, &w0, ws[9], p[6]); + word3_muladd(&w2, &w1, &w0, ws[10], p[5]); + word3_muladd(&w2, &w1, &w0, ws[11], p[4]); + word3_muladd(&w2, &w1, &w0, ws[12], p[3]); + word3_muladd(&w2, &w1, &w0, ws[13], p[2]); + word3_muladd(&w2, &w1, &w0, ws[14], p[1]); + word3_add(&w2, &w1, &w0, z[15]); + ws[15] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[15], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[16]); + word3_muladd(&w2, &w1, &w0, ws[1], p[15]); + word3_muladd(&w2, &w1, &w0, ws[2], p[14]); + word3_muladd(&w2, &w1, &w0, ws[3], p[13]); + word3_muladd(&w2, &w1, &w0, ws[4], p[12]); + word3_muladd(&w2, &w1, &w0, ws[5], p[11]); + word3_muladd(&w2, &w1, &w0, ws[6], p[10]); + word3_muladd(&w2, &w1, &w0, ws[7], p[9]); + word3_muladd(&w2, &w1, &w0, ws[8], p[8]); + word3_muladd(&w2, &w1, &w0, ws[9], p[7]); + word3_muladd(&w2, &w1, &w0, ws[10], p[6]); + word3_muladd(&w2, &w1, &w0, ws[11], p[5]); + word3_muladd(&w2, &w1, &w0, ws[12], p[4]); + word3_muladd(&w2, &w1, &w0, ws[13], p[3]); + word3_muladd(&w2, &w1, &w0, ws[14], p[2]); + word3_muladd(&w2, &w1, &w0, ws[15], p[1]); + word3_add(&w2, &w1, &w0, z[16]); + ws[16] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[16], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[17]); + word3_muladd(&w2, &w1, &w0, ws[1], p[16]); + word3_muladd(&w2, &w1, &w0, ws[2], p[15]); + word3_muladd(&w2, &w1, &w0, ws[3], p[14]); + word3_muladd(&w2, &w1, &w0, ws[4], p[13]); + word3_muladd(&w2, &w1, &w0, ws[5], p[12]); + word3_muladd(&w2, &w1, &w0, ws[6], p[11]); + word3_muladd(&w2, &w1, &w0, ws[7], p[10]); + word3_muladd(&w2, &w1, &w0, ws[8], p[9]); + word3_muladd(&w2, &w1, &w0, ws[9], p[8]); + word3_muladd(&w2, &w1, &w0, ws[10], p[7]); + word3_muladd(&w2, &w1, &w0, ws[11], p[6]); + word3_muladd(&w2, &w1, &w0, ws[12], p[5]); + word3_muladd(&w2, &w1, &w0, ws[13], p[4]); + word3_muladd(&w2, &w1, &w0, ws[14], p[3]); + word3_muladd(&w2, &w1, &w0, ws[15], p[2]); + word3_muladd(&w2, &w1, &w0, ws[16], p[1]); + word3_add(&w2, &w1, &w0, z[17]); + ws[17] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[17], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[18]); + word3_muladd(&w2, &w1, &w0, ws[1], p[17]); + word3_muladd(&w2, &w1, &w0, ws[2], p[16]); + word3_muladd(&w2, &w1, &w0, ws[3], p[15]); + word3_muladd(&w2, &w1, &w0, ws[4], p[14]); + word3_muladd(&w2, &w1, &w0, ws[5], p[13]); + word3_muladd(&w2, &w1, &w0, ws[6], p[12]); + word3_muladd(&w2, &w1, &w0, ws[7], p[11]); + word3_muladd(&w2, &w1, &w0, ws[8], p[10]); + word3_muladd(&w2, &w1, &w0, ws[9], p[9]); + word3_muladd(&w2, &w1, &w0, ws[10], p[8]); + word3_muladd(&w2, &w1, &w0, ws[11], p[7]); + word3_muladd(&w2, &w1, &w0, ws[12], p[6]); + word3_muladd(&w2, &w1, &w0, ws[13], p[5]); + word3_muladd(&w2, &w1, &w0, ws[14], p[4]); + word3_muladd(&w2, &w1, &w0, ws[15], p[3]); + word3_muladd(&w2, &w1, &w0, ws[16], p[2]); + word3_muladd(&w2, &w1, &w0, ws[17], p[1]); + word3_add(&w2, &w1, &w0, z[18]); + ws[18] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[18], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[19]); + word3_muladd(&w2, &w1, &w0, ws[1], p[18]); + word3_muladd(&w2, &w1, &w0, ws[2], p[17]); + word3_muladd(&w2, &w1, &w0, ws[3], p[16]); + word3_muladd(&w2, &w1, &w0, ws[4], p[15]); + word3_muladd(&w2, &w1, &w0, ws[5], p[14]); + word3_muladd(&w2, &w1, &w0, ws[6], p[13]); + word3_muladd(&w2, &w1, &w0, ws[7], p[12]); + word3_muladd(&w2, &w1, &w0, ws[8], p[11]); + word3_muladd(&w2, &w1, &w0, ws[9], p[10]); + word3_muladd(&w2, &w1, &w0, ws[10], p[9]); + word3_muladd(&w2, &w1, &w0, ws[11], p[8]); + word3_muladd(&w2, &w1, &w0, ws[12], p[7]); + word3_muladd(&w2, &w1, &w0, ws[13], p[6]); + word3_muladd(&w2, &w1, &w0, ws[14], p[5]); + word3_muladd(&w2, &w1, &w0, ws[15], p[4]); + word3_muladd(&w2, &w1, &w0, ws[16], p[3]); + word3_muladd(&w2, &w1, &w0, ws[17], p[2]); + word3_muladd(&w2, &w1, &w0, ws[18], p[1]); + word3_add(&w2, &w1, &w0, z[19]); + ws[19] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[19], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[20]); + word3_muladd(&w2, &w1, &w0, ws[1], p[19]); + word3_muladd(&w2, &w1, &w0, ws[2], p[18]); + word3_muladd(&w2, &w1, &w0, ws[3], p[17]); + word3_muladd(&w2, &w1, &w0, ws[4], p[16]); + word3_muladd(&w2, &w1, &w0, ws[5], p[15]); + word3_muladd(&w2, &w1, &w0, ws[6], p[14]); + word3_muladd(&w2, &w1, &w0, ws[7], p[13]); + word3_muladd(&w2, &w1, &w0, ws[8], p[12]); + word3_muladd(&w2, &w1, &w0, ws[9], p[11]); + word3_muladd(&w2, &w1, &w0, ws[10], p[10]); + word3_muladd(&w2, &w1, &w0, ws[11], p[9]); + word3_muladd(&w2, &w1, &w0, ws[12], p[8]); + word3_muladd(&w2, &w1, &w0, ws[13], p[7]); + word3_muladd(&w2, &w1, &w0, ws[14], p[6]); + word3_muladd(&w2, &w1, &w0, ws[15], p[5]); + word3_muladd(&w2, &w1, &w0, ws[16], p[4]); + word3_muladd(&w2, &w1, &w0, ws[17], p[3]); + word3_muladd(&w2, &w1, &w0, ws[18], p[2]); + word3_muladd(&w2, &w1, &w0, ws[19], p[1]); + word3_add(&w2, &w1, &w0, z[20]); + ws[20] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[20], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[21]); + word3_muladd(&w2, &w1, &w0, ws[1], p[20]); + word3_muladd(&w2, &w1, &w0, ws[2], p[19]); + word3_muladd(&w2, &w1, &w0, ws[3], p[18]); + word3_muladd(&w2, &w1, &w0, ws[4], p[17]); + word3_muladd(&w2, &w1, &w0, ws[5], p[16]); + word3_muladd(&w2, &w1, &w0, ws[6], p[15]); + word3_muladd(&w2, &w1, &w0, ws[7], p[14]); + word3_muladd(&w2, &w1, &w0, ws[8], p[13]); + word3_muladd(&w2, &w1, &w0, ws[9], p[12]); + word3_muladd(&w2, &w1, &w0, ws[10], p[11]); + word3_muladd(&w2, &w1, &w0, ws[11], p[10]); + word3_muladd(&w2, &w1, &w0, ws[12], p[9]); + word3_muladd(&w2, &w1, &w0, ws[13], p[8]); + word3_muladd(&w2, &w1, &w0, ws[14], p[7]); + word3_muladd(&w2, &w1, &w0, ws[15], p[6]); + word3_muladd(&w2, &w1, &w0, ws[16], p[5]); + word3_muladd(&w2, &w1, &w0, ws[17], p[4]); + word3_muladd(&w2, &w1, &w0, ws[18], p[3]); + word3_muladd(&w2, &w1, &w0, ws[19], p[2]); + word3_muladd(&w2, &w1, &w0, ws[20], p[1]); + word3_add(&w2, &w1, &w0, z[21]); + ws[21] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[21], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[22]); + word3_muladd(&w2, &w1, &w0, ws[1], p[21]); + word3_muladd(&w2, &w1, &w0, ws[2], p[20]); + word3_muladd(&w2, &w1, &w0, ws[3], p[19]); + word3_muladd(&w2, &w1, &w0, ws[4], p[18]); + word3_muladd(&w2, &w1, &w0, ws[5], p[17]); + word3_muladd(&w2, &w1, &w0, ws[6], p[16]); + word3_muladd(&w2, &w1, &w0, ws[7], p[15]); + word3_muladd(&w2, &w1, &w0, ws[8], p[14]); + word3_muladd(&w2, &w1, &w0, ws[9], p[13]); + word3_muladd(&w2, &w1, &w0, ws[10], p[12]); + word3_muladd(&w2, &w1, &w0, ws[11], p[11]); + word3_muladd(&w2, &w1, &w0, ws[12], p[10]); + word3_muladd(&w2, &w1, &w0, ws[13], p[9]); + word3_muladd(&w2, &w1, &w0, ws[14], p[8]); + word3_muladd(&w2, &w1, &w0, ws[15], p[7]); + word3_muladd(&w2, &w1, &w0, ws[16], p[6]); + word3_muladd(&w2, &w1, &w0, ws[17], p[5]); + word3_muladd(&w2, &w1, &w0, ws[18], p[4]); + word3_muladd(&w2, &w1, &w0, ws[19], p[3]); + word3_muladd(&w2, &w1, &w0, ws[20], p[2]); + word3_muladd(&w2, &w1, &w0, ws[21], p[1]); + word3_add(&w2, &w1, &w0, z[22]); + ws[22] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[22], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[23]); + word3_muladd(&w2, &w1, &w0, ws[1], p[22]); + word3_muladd(&w2, &w1, &w0, ws[2], p[21]); + word3_muladd(&w2, &w1, &w0, ws[3], p[20]); + word3_muladd(&w2, &w1, &w0, ws[4], p[19]); + word3_muladd(&w2, &w1, &w0, ws[5], p[18]); + word3_muladd(&w2, &w1, &w0, ws[6], p[17]); + word3_muladd(&w2, &w1, &w0, ws[7], p[16]); + word3_muladd(&w2, &w1, &w0, ws[8], p[15]); + word3_muladd(&w2, &w1, &w0, ws[9], p[14]); + word3_muladd(&w2, &w1, &w0, ws[10], p[13]); + word3_muladd(&w2, &w1, &w0, ws[11], p[12]); + word3_muladd(&w2, &w1, &w0, ws[12], p[11]); + word3_muladd(&w2, &w1, &w0, ws[13], p[10]); + word3_muladd(&w2, &w1, &w0, ws[14], p[9]); + word3_muladd(&w2, &w1, &w0, ws[15], p[8]); + word3_muladd(&w2, &w1, &w0, ws[16], p[7]); + word3_muladd(&w2, &w1, &w0, ws[17], p[6]); + word3_muladd(&w2, &w1, &w0, ws[18], p[5]); + word3_muladd(&w2, &w1, &w0, ws[19], p[4]); + word3_muladd(&w2, &w1, &w0, ws[20], p[3]); + word3_muladd(&w2, &w1, &w0, ws[21], p[2]); + word3_muladd(&w2, &w1, &w0, ws[22], p[1]); + word3_add(&w2, &w1, &w0, z[23]); + ws[23] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[23], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[1], p[23]); + word3_muladd(&w2, &w1, &w0, ws[2], p[22]); + word3_muladd(&w2, &w1, &w0, ws[3], p[21]); + word3_muladd(&w2, &w1, &w0, ws[4], p[20]); + word3_muladd(&w2, &w1, &w0, ws[5], p[19]); + word3_muladd(&w2, &w1, &w0, ws[6], p[18]); + word3_muladd(&w2, &w1, &w0, ws[7], p[17]); + word3_muladd(&w2, &w1, &w0, ws[8], p[16]); + word3_muladd(&w2, &w1, &w0, ws[9], p[15]); + word3_muladd(&w2, &w1, &w0, ws[10], p[14]); + word3_muladd(&w2, &w1, &w0, ws[11], p[13]); + word3_muladd(&w2, &w1, &w0, ws[12], p[12]); + word3_muladd(&w2, &w1, &w0, ws[13], p[11]); + word3_muladd(&w2, &w1, &w0, ws[14], p[10]); + word3_muladd(&w2, &w1, &w0, ws[15], p[9]); + word3_muladd(&w2, &w1, &w0, ws[16], p[8]); + word3_muladd(&w2, &w1, &w0, ws[17], p[7]); + word3_muladd(&w2, &w1, &w0, ws[18], p[6]); + word3_muladd(&w2, &w1, &w0, ws[19], p[5]); + word3_muladd(&w2, &w1, &w0, ws[20], p[4]); + word3_muladd(&w2, &w1, &w0, ws[21], p[3]); + word3_muladd(&w2, &w1, &w0, ws[22], p[2]); + word3_muladd(&w2, &w1, &w0, ws[23], p[1]); + word3_add(&w2, &w1, &w0, z[24]); + ws[0] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[2], p[23]); + word3_muladd(&w2, &w1, &w0, ws[3], p[22]); + word3_muladd(&w2, &w1, &w0, ws[4], p[21]); + word3_muladd(&w2, &w1, &w0, ws[5], p[20]); + word3_muladd(&w2, &w1, &w0, ws[6], p[19]); + word3_muladd(&w2, &w1, &w0, ws[7], p[18]); + word3_muladd(&w2, &w1, &w0, ws[8], p[17]); + word3_muladd(&w2, &w1, &w0, ws[9], p[16]); + word3_muladd(&w2, &w1, &w0, ws[10], p[15]); + word3_muladd(&w2, &w1, &w0, ws[11], p[14]); + word3_muladd(&w2, &w1, &w0, ws[12], p[13]); + word3_muladd(&w2, &w1, &w0, ws[13], p[12]); + word3_muladd(&w2, &w1, &w0, ws[14], p[11]); + word3_muladd(&w2, &w1, &w0, ws[15], p[10]); + word3_muladd(&w2, &w1, &w0, ws[16], p[9]); + word3_muladd(&w2, &w1, &w0, ws[17], p[8]); + word3_muladd(&w2, &w1, &w0, ws[18], p[7]); + word3_muladd(&w2, &w1, &w0, ws[19], p[6]); + word3_muladd(&w2, &w1, &w0, ws[20], p[5]); + word3_muladd(&w2, &w1, &w0, ws[21], p[4]); + word3_muladd(&w2, &w1, &w0, ws[22], p[3]); + word3_muladd(&w2, &w1, &w0, ws[23], p[2]); + word3_add(&w2, &w1, &w0, z[25]); + ws[1] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[3], p[23]); + word3_muladd(&w2, &w1, &w0, ws[4], p[22]); + word3_muladd(&w2, &w1, &w0, ws[5], p[21]); + word3_muladd(&w2, &w1, &w0, ws[6], p[20]); + word3_muladd(&w2, &w1, &w0, ws[7], p[19]); + word3_muladd(&w2, &w1, &w0, ws[8], p[18]); + word3_muladd(&w2, &w1, &w0, ws[9], p[17]); + word3_muladd(&w2, &w1, &w0, ws[10], p[16]); + word3_muladd(&w2, &w1, &w0, ws[11], p[15]); + word3_muladd(&w2, &w1, &w0, ws[12], p[14]); + word3_muladd(&w2, &w1, &w0, ws[13], p[13]); + word3_muladd(&w2, &w1, &w0, ws[14], p[12]); + word3_muladd(&w2, &w1, &w0, ws[15], p[11]); + word3_muladd(&w2, &w1, &w0, ws[16], p[10]); + word3_muladd(&w2, &w1, &w0, ws[17], p[9]); + word3_muladd(&w2, &w1, &w0, ws[18], p[8]); + word3_muladd(&w2, &w1, &w0, ws[19], p[7]); + word3_muladd(&w2, &w1, &w0, ws[20], p[6]); + word3_muladd(&w2, &w1, &w0, ws[21], p[5]); + word3_muladd(&w2, &w1, &w0, ws[22], p[4]); + word3_muladd(&w2, &w1, &w0, ws[23], p[3]); + word3_add(&w2, &w1, &w0, z[26]); + ws[2] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[4], p[23]); + word3_muladd(&w2, &w1, &w0, ws[5], p[22]); + word3_muladd(&w2, &w1, &w0, ws[6], p[21]); + word3_muladd(&w2, &w1, &w0, ws[7], p[20]); + word3_muladd(&w2, &w1, &w0, ws[8], p[19]); + word3_muladd(&w2, &w1, &w0, ws[9], p[18]); + word3_muladd(&w2, &w1, &w0, ws[10], p[17]); + word3_muladd(&w2, &w1, &w0, ws[11], p[16]); + word3_muladd(&w2, &w1, &w0, ws[12], p[15]); + word3_muladd(&w2, &w1, &w0, ws[13], p[14]); + word3_muladd(&w2, &w1, &w0, ws[14], p[13]); + word3_muladd(&w2, &w1, &w0, ws[15], p[12]); + word3_muladd(&w2, &w1, &w0, ws[16], p[11]); + word3_muladd(&w2, &w1, &w0, ws[17], p[10]); + word3_muladd(&w2, &w1, &w0, ws[18], p[9]); + word3_muladd(&w2, &w1, &w0, ws[19], p[8]); + word3_muladd(&w2, &w1, &w0, ws[20], p[7]); + word3_muladd(&w2, &w1, &w0, ws[21], p[6]); + word3_muladd(&w2, &w1, &w0, ws[22], p[5]); + word3_muladd(&w2, &w1, &w0, ws[23], p[4]); + word3_add(&w2, &w1, &w0, z[27]); + ws[3] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[5], p[23]); + word3_muladd(&w2, &w1, &w0, ws[6], p[22]); + word3_muladd(&w2, &w1, &w0, ws[7], p[21]); + word3_muladd(&w2, &w1, &w0, ws[8], p[20]); + word3_muladd(&w2, &w1, &w0, ws[9], p[19]); + word3_muladd(&w2, &w1, &w0, ws[10], p[18]); + word3_muladd(&w2, &w1, &w0, ws[11], p[17]); + word3_muladd(&w2, &w1, &w0, ws[12], p[16]); + word3_muladd(&w2, &w1, &w0, ws[13], p[15]); + word3_muladd(&w2, &w1, &w0, ws[14], p[14]); + word3_muladd(&w2, &w1, &w0, ws[15], p[13]); + word3_muladd(&w2, &w1, &w0, ws[16], p[12]); + word3_muladd(&w2, &w1, &w0, ws[17], p[11]); + word3_muladd(&w2, &w1, &w0, ws[18], p[10]); + word3_muladd(&w2, &w1, &w0, ws[19], p[9]); + word3_muladd(&w2, &w1, &w0, ws[20], p[8]); + word3_muladd(&w2, &w1, &w0, ws[21], p[7]); + word3_muladd(&w2, &w1, &w0, ws[22], p[6]); + word3_muladd(&w2, &w1, &w0, ws[23], p[5]); + word3_add(&w2, &w1, &w0, z[28]); + ws[4] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[6], p[23]); + word3_muladd(&w2, &w1, &w0, ws[7], p[22]); + word3_muladd(&w2, &w1, &w0, ws[8], p[21]); + word3_muladd(&w2, &w1, &w0, ws[9], p[20]); + word3_muladd(&w2, &w1, &w0, ws[10], p[19]); + word3_muladd(&w2, &w1, &w0, ws[11], p[18]); + word3_muladd(&w2, &w1, &w0, ws[12], p[17]); + word3_muladd(&w2, &w1, &w0, ws[13], p[16]); + word3_muladd(&w2, &w1, &w0, ws[14], p[15]); + word3_muladd(&w2, &w1, &w0, ws[15], p[14]); + word3_muladd(&w2, &w1, &w0, ws[16], p[13]); + word3_muladd(&w2, &w1, &w0, ws[17], p[12]); + word3_muladd(&w2, &w1, &w0, ws[18], p[11]); + word3_muladd(&w2, &w1, &w0, ws[19], p[10]); + word3_muladd(&w2, &w1, &w0, ws[20], p[9]); + word3_muladd(&w2, &w1, &w0, ws[21], p[8]); + word3_muladd(&w2, &w1, &w0, ws[22], p[7]); + word3_muladd(&w2, &w1, &w0, ws[23], p[6]); + word3_add(&w2, &w1, &w0, z[29]); + ws[5] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[7], p[23]); + word3_muladd(&w2, &w1, &w0, ws[8], p[22]); + word3_muladd(&w2, &w1, &w0, ws[9], p[21]); + word3_muladd(&w2, &w1, &w0, ws[10], p[20]); + word3_muladd(&w2, &w1, &w0, ws[11], p[19]); + word3_muladd(&w2, &w1, &w0, ws[12], p[18]); + word3_muladd(&w2, &w1, &w0, ws[13], p[17]); + word3_muladd(&w2, &w1, &w0, ws[14], p[16]); + word3_muladd(&w2, &w1, &w0, ws[15], p[15]); + word3_muladd(&w2, &w1, &w0, ws[16], p[14]); + word3_muladd(&w2, &w1, &w0, ws[17], p[13]); + word3_muladd(&w2, &w1, &w0, ws[18], p[12]); + word3_muladd(&w2, &w1, &w0, ws[19], p[11]); + word3_muladd(&w2, &w1, &w0, ws[20], p[10]); + word3_muladd(&w2, &w1, &w0, ws[21], p[9]); + word3_muladd(&w2, &w1, &w0, ws[22], p[8]); + word3_muladd(&w2, &w1, &w0, ws[23], p[7]); + word3_add(&w2, &w1, &w0, z[30]); + ws[6] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[8], p[23]); + word3_muladd(&w2, &w1, &w0, ws[9], p[22]); + word3_muladd(&w2, &w1, &w0, ws[10], p[21]); + word3_muladd(&w2, &w1, &w0, ws[11], p[20]); + word3_muladd(&w2, &w1, &w0, ws[12], p[19]); + word3_muladd(&w2, &w1, &w0, ws[13], p[18]); + word3_muladd(&w2, &w1, &w0, ws[14], p[17]); + word3_muladd(&w2, &w1, &w0, ws[15], p[16]); + word3_muladd(&w2, &w1, &w0, ws[16], p[15]); + word3_muladd(&w2, &w1, &w0, ws[17], p[14]); + word3_muladd(&w2, &w1, &w0, ws[18], p[13]); + word3_muladd(&w2, &w1, &w0, ws[19], p[12]); + word3_muladd(&w2, &w1, &w0, ws[20], p[11]); + word3_muladd(&w2, &w1, &w0, ws[21], p[10]); + word3_muladd(&w2, &w1, &w0, ws[22], p[9]); + word3_muladd(&w2, &w1, &w0, ws[23], p[8]); + word3_add(&w2, &w1, &w0, z[31]); + ws[7] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[9], p[23]); + word3_muladd(&w2, &w1, &w0, ws[10], p[22]); + word3_muladd(&w2, &w1, &w0, ws[11], p[21]); + word3_muladd(&w2, &w1, &w0, ws[12], p[20]); + word3_muladd(&w2, &w1, &w0, ws[13], p[19]); + word3_muladd(&w2, &w1, &w0, ws[14], p[18]); + word3_muladd(&w2, &w1, &w0, ws[15], p[17]); + word3_muladd(&w2, &w1, &w0, ws[16], p[16]); + word3_muladd(&w2, &w1, &w0, ws[17], p[15]); + word3_muladd(&w2, &w1, &w0, ws[18], p[14]); + word3_muladd(&w2, &w1, &w0, ws[19], p[13]); + word3_muladd(&w2, &w1, &w0, ws[20], p[12]); + word3_muladd(&w2, &w1, &w0, ws[21], p[11]); + word3_muladd(&w2, &w1, &w0, ws[22], p[10]); + word3_muladd(&w2, &w1, &w0, ws[23], p[9]); + word3_add(&w2, &w1, &w0, z[32]); + ws[8] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[10], p[23]); + word3_muladd(&w2, &w1, &w0, ws[11], p[22]); + word3_muladd(&w2, &w1, &w0, ws[12], p[21]); + word3_muladd(&w2, &w1, &w0, ws[13], p[20]); + word3_muladd(&w2, &w1, &w0, ws[14], p[19]); + word3_muladd(&w2, &w1, &w0, ws[15], p[18]); + word3_muladd(&w2, &w1, &w0, ws[16], p[17]); + word3_muladd(&w2, &w1, &w0, ws[17], p[16]); + word3_muladd(&w2, &w1, &w0, ws[18], p[15]); + word3_muladd(&w2, &w1, &w0, ws[19], p[14]); + word3_muladd(&w2, &w1, &w0, ws[20], p[13]); + word3_muladd(&w2, &w1, &w0, ws[21], p[12]); + word3_muladd(&w2, &w1, &w0, ws[22], p[11]); + word3_muladd(&w2, &w1, &w0, ws[23], p[10]); + word3_add(&w2, &w1, &w0, z[33]); + ws[9] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[11], p[23]); + word3_muladd(&w2, &w1, &w0, ws[12], p[22]); + word3_muladd(&w2, &w1, &w0, ws[13], p[21]); + word3_muladd(&w2, &w1, &w0, ws[14], p[20]); + word3_muladd(&w2, &w1, &w0, ws[15], p[19]); + word3_muladd(&w2, &w1, &w0, ws[16], p[18]); + word3_muladd(&w2, &w1, &w0, ws[17], p[17]); + word3_muladd(&w2, &w1, &w0, ws[18], p[16]); + word3_muladd(&w2, &w1, &w0, ws[19], p[15]); + word3_muladd(&w2, &w1, &w0, ws[20], p[14]); + word3_muladd(&w2, &w1, &w0, ws[21], p[13]); + word3_muladd(&w2, &w1, &w0, ws[22], p[12]); + word3_muladd(&w2, &w1, &w0, ws[23], p[11]); + word3_add(&w2, &w1, &w0, z[34]); + ws[10] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[12], p[23]); + word3_muladd(&w2, &w1, &w0, ws[13], p[22]); + word3_muladd(&w2, &w1, &w0, ws[14], p[21]); + word3_muladd(&w2, &w1, &w0, ws[15], p[20]); + word3_muladd(&w2, &w1, &w0, ws[16], p[19]); + word3_muladd(&w2, &w1, &w0, ws[17], p[18]); + word3_muladd(&w2, &w1, &w0, ws[18], p[17]); + word3_muladd(&w2, &w1, &w0, ws[19], p[16]); + word3_muladd(&w2, &w1, &w0, ws[20], p[15]); + word3_muladd(&w2, &w1, &w0, ws[21], p[14]); + word3_muladd(&w2, &w1, &w0, ws[22], p[13]); + word3_muladd(&w2, &w1, &w0, ws[23], p[12]); + word3_add(&w2, &w1, &w0, z[35]); + ws[11] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[13], p[23]); + word3_muladd(&w2, &w1, &w0, ws[14], p[22]); + word3_muladd(&w2, &w1, &w0, ws[15], p[21]); + word3_muladd(&w2, &w1, &w0, ws[16], p[20]); + word3_muladd(&w2, &w1, &w0, ws[17], p[19]); + word3_muladd(&w2, &w1, &w0, ws[18], p[18]); + word3_muladd(&w2, &w1, &w0, ws[19], p[17]); + word3_muladd(&w2, &w1, &w0, ws[20], p[16]); + word3_muladd(&w2, &w1, &w0, ws[21], p[15]); + word3_muladd(&w2, &w1, &w0, ws[22], p[14]); + word3_muladd(&w2, &w1, &w0, ws[23], p[13]); + word3_add(&w2, &w1, &w0, z[36]); + ws[12] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[14], p[23]); + word3_muladd(&w2, &w1, &w0, ws[15], p[22]); + word3_muladd(&w2, &w1, &w0, ws[16], p[21]); + word3_muladd(&w2, &w1, &w0, ws[17], p[20]); + word3_muladd(&w2, &w1, &w0, ws[18], p[19]); + word3_muladd(&w2, &w1, &w0, ws[19], p[18]); + word3_muladd(&w2, &w1, &w0, ws[20], p[17]); + word3_muladd(&w2, &w1, &w0, ws[21], p[16]); + word3_muladd(&w2, &w1, &w0, ws[22], p[15]); + word3_muladd(&w2, &w1, &w0, ws[23], p[14]); + word3_add(&w2, &w1, &w0, z[37]); + ws[13] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[15], p[23]); + word3_muladd(&w2, &w1, &w0, ws[16], p[22]); + word3_muladd(&w2, &w1, &w0, ws[17], p[21]); + word3_muladd(&w2, &w1, &w0, ws[18], p[20]); + word3_muladd(&w2, &w1, &w0, ws[19], p[19]); + word3_muladd(&w2, &w1, &w0, ws[20], p[18]); + word3_muladd(&w2, &w1, &w0, ws[21], p[17]); + word3_muladd(&w2, &w1, &w0, ws[22], p[16]); + word3_muladd(&w2, &w1, &w0, ws[23], p[15]); + word3_add(&w2, &w1, &w0, z[38]); + ws[14] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[16], p[23]); + word3_muladd(&w2, &w1, &w0, ws[17], p[22]); + word3_muladd(&w2, &w1, &w0, ws[18], p[21]); + word3_muladd(&w2, &w1, &w0, ws[19], p[20]); + word3_muladd(&w2, &w1, &w0, ws[20], p[19]); + word3_muladd(&w2, &w1, &w0, ws[21], p[18]); + word3_muladd(&w2, &w1, &w0, ws[22], p[17]); + word3_muladd(&w2, &w1, &w0, ws[23], p[16]); + word3_add(&w2, &w1, &w0, z[39]); + ws[15] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[17], p[23]); + word3_muladd(&w2, &w1, &w0, ws[18], p[22]); + word3_muladd(&w2, &w1, &w0, ws[19], p[21]); + word3_muladd(&w2, &w1, &w0, ws[20], p[20]); + word3_muladd(&w2, &w1, &w0, ws[21], p[19]); + word3_muladd(&w2, &w1, &w0, ws[22], p[18]); + word3_muladd(&w2, &w1, &w0, ws[23], p[17]); + word3_add(&w2, &w1, &w0, z[40]); + ws[16] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[18], p[23]); + word3_muladd(&w2, &w1, &w0, ws[19], p[22]); + word3_muladd(&w2, &w1, &w0, ws[20], p[21]); + word3_muladd(&w2, &w1, &w0, ws[21], p[20]); + word3_muladd(&w2, &w1, &w0, ws[22], p[19]); + word3_muladd(&w2, &w1, &w0, ws[23], p[18]); + word3_add(&w2, &w1, &w0, z[41]); + ws[17] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[19], p[23]); + word3_muladd(&w2, &w1, &w0, ws[20], p[22]); + word3_muladd(&w2, &w1, &w0, ws[21], p[21]); + word3_muladd(&w2, &w1, &w0, ws[22], p[20]); + word3_muladd(&w2, &w1, &w0, ws[23], p[19]); + word3_add(&w2, &w1, &w0, z[42]); + ws[18] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[20], p[23]); + word3_muladd(&w2, &w1, &w0, ws[21], p[22]); + word3_muladd(&w2, &w1, &w0, ws[22], p[21]); + word3_muladd(&w2, &w1, &w0, ws[23], p[20]); + word3_add(&w2, &w1, &w0, z[43]); + ws[19] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[21], p[23]); + word3_muladd(&w2, &w1, &w0, ws[22], p[22]); + word3_muladd(&w2, &w1, &w0, ws[23], p[21]); + word3_add(&w2, &w1, &w0, z[44]); + ws[20] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[22], p[23]); + word3_muladd(&w2, &w1, &w0, ws[23], p[22]); + word3_add(&w2, &w1, &w0, z[45]); + ws[21] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[23], p[23]); + word3_add(&w2, &w1, &w0, z[46]); + ws[22] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_add(&w2, &w1, &w0, z[47]); + ws[23] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_add(&w2, &w1, &w0, z[49]); + ws[24] = w0; + ws[25] = w1; + word borrow = bigint_sub3(ws + 24 + 1, ws, 24 + 1, p, 24); + CT::conditional_copy_mem(borrow, z, ws, ws + 25, 25); + clear_mem(z + 24, 2*(24+1) - 24); + } + +void bigint_monty_redc_32(word z[], const word p[32], word p_dash, word ws[]) + { + word w2 = 0, w1 = 0, w0 = 0; + w0 = z[0]; + ws[0] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[0], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[1]); + word3_add(&w2, &w1, &w0, z[1]); + ws[1] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[1], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[2]); + word3_muladd(&w2, &w1, &w0, ws[1], p[1]); + word3_add(&w2, &w1, &w0, z[2]); + ws[2] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[2], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[3]); + word3_muladd(&w2, &w1, &w0, ws[1], p[2]); + word3_muladd(&w2, &w1, &w0, ws[2], p[1]); + word3_add(&w2, &w1, &w0, z[3]); + ws[3] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[3], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[4]); + word3_muladd(&w2, &w1, &w0, ws[1], p[3]); + word3_muladd(&w2, &w1, &w0, ws[2], p[2]); + word3_muladd(&w2, &w1, &w0, ws[3], p[1]); + word3_add(&w2, &w1, &w0, z[4]); + ws[4] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[4], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[5]); + word3_muladd(&w2, &w1, &w0, ws[1], p[4]); + word3_muladd(&w2, &w1, &w0, ws[2], p[3]); + word3_muladd(&w2, &w1, &w0, ws[3], p[2]); + word3_muladd(&w2, &w1, &w0, ws[4], p[1]); + word3_add(&w2, &w1, &w0, z[5]); + ws[5] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[5], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[6]); + word3_muladd(&w2, &w1, &w0, ws[1], p[5]); + word3_muladd(&w2, &w1, &w0, ws[2], p[4]); + word3_muladd(&w2, &w1, &w0, ws[3], p[3]); + word3_muladd(&w2, &w1, &w0, ws[4], p[2]); + word3_muladd(&w2, &w1, &w0, ws[5], p[1]); + word3_add(&w2, &w1, &w0, z[6]); + ws[6] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[6], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[7]); + word3_muladd(&w2, &w1, &w0, ws[1], p[6]); + word3_muladd(&w2, &w1, &w0, ws[2], p[5]); + word3_muladd(&w2, &w1, &w0, ws[3], p[4]); + word3_muladd(&w2, &w1, &w0, ws[4], p[3]); + word3_muladd(&w2, &w1, &w0, ws[5], p[2]); + word3_muladd(&w2, &w1, &w0, ws[6], p[1]); + word3_add(&w2, &w1, &w0, z[7]); + ws[7] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[7], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[8]); + word3_muladd(&w2, &w1, &w0, ws[1], p[7]); + word3_muladd(&w2, &w1, &w0, ws[2], p[6]); + word3_muladd(&w2, &w1, &w0, ws[3], p[5]); + word3_muladd(&w2, &w1, &w0, ws[4], p[4]); + word3_muladd(&w2, &w1, &w0, ws[5], p[3]); + word3_muladd(&w2, &w1, &w0, ws[6], p[2]); + word3_muladd(&w2, &w1, &w0, ws[7], p[1]); + word3_add(&w2, &w1, &w0, z[8]); + ws[8] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[8], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[9]); + word3_muladd(&w2, &w1, &w0, ws[1], p[8]); + word3_muladd(&w2, &w1, &w0, ws[2], p[7]); + word3_muladd(&w2, &w1, &w0, ws[3], p[6]); + word3_muladd(&w2, &w1, &w0, ws[4], p[5]); + word3_muladd(&w2, &w1, &w0, ws[5], p[4]); + word3_muladd(&w2, &w1, &w0, ws[6], p[3]); + word3_muladd(&w2, &w1, &w0, ws[7], p[2]); + word3_muladd(&w2, &w1, &w0, ws[8], p[1]); + word3_add(&w2, &w1, &w0, z[9]); + ws[9] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[9], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[10]); + word3_muladd(&w2, &w1, &w0, ws[1], p[9]); + word3_muladd(&w2, &w1, &w0, ws[2], p[8]); + word3_muladd(&w2, &w1, &w0, ws[3], p[7]); + word3_muladd(&w2, &w1, &w0, ws[4], p[6]); + word3_muladd(&w2, &w1, &w0, ws[5], p[5]); + word3_muladd(&w2, &w1, &w0, ws[6], p[4]); + word3_muladd(&w2, &w1, &w0, ws[7], p[3]); + word3_muladd(&w2, &w1, &w0, ws[8], p[2]); + word3_muladd(&w2, &w1, &w0, ws[9], p[1]); + word3_add(&w2, &w1, &w0, z[10]); + ws[10] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[10], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[11]); + word3_muladd(&w2, &w1, &w0, ws[1], p[10]); + word3_muladd(&w2, &w1, &w0, ws[2], p[9]); + word3_muladd(&w2, &w1, &w0, ws[3], p[8]); + word3_muladd(&w2, &w1, &w0, ws[4], p[7]); + word3_muladd(&w2, &w1, &w0, ws[5], p[6]); + word3_muladd(&w2, &w1, &w0, ws[6], p[5]); + word3_muladd(&w2, &w1, &w0, ws[7], p[4]); + word3_muladd(&w2, &w1, &w0, ws[8], p[3]); + word3_muladd(&w2, &w1, &w0, ws[9], p[2]); + word3_muladd(&w2, &w1, &w0, ws[10], p[1]); + word3_add(&w2, &w1, &w0, z[11]); + ws[11] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[11], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[12]); + word3_muladd(&w2, &w1, &w0, ws[1], p[11]); + word3_muladd(&w2, &w1, &w0, ws[2], p[10]); + word3_muladd(&w2, &w1, &w0, ws[3], p[9]); + word3_muladd(&w2, &w1, &w0, ws[4], p[8]); + word3_muladd(&w2, &w1, &w0, ws[5], p[7]); + word3_muladd(&w2, &w1, &w0, ws[6], p[6]); + word3_muladd(&w2, &w1, &w0, ws[7], p[5]); + word3_muladd(&w2, &w1, &w0, ws[8], p[4]); + word3_muladd(&w2, &w1, &w0, ws[9], p[3]); + word3_muladd(&w2, &w1, &w0, ws[10], p[2]); + word3_muladd(&w2, &w1, &w0, ws[11], p[1]); + word3_add(&w2, &w1, &w0, z[12]); + ws[12] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[12], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[13]); + word3_muladd(&w2, &w1, &w0, ws[1], p[12]); + word3_muladd(&w2, &w1, &w0, ws[2], p[11]); + word3_muladd(&w2, &w1, &w0, ws[3], p[10]); + word3_muladd(&w2, &w1, &w0, ws[4], p[9]); + word3_muladd(&w2, &w1, &w0, ws[5], p[8]); + word3_muladd(&w2, &w1, &w0, ws[6], p[7]); + word3_muladd(&w2, &w1, &w0, ws[7], p[6]); + word3_muladd(&w2, &w1, &w0, ws[8], p[5]); + word3_muladd(&w2, &w1, &w0, ws[9], p[4]); + word3_muladd(&w2, &w1, &w0, ws[10], p[3]); + word3_muladd(&w2, &w1, &w0, ws[11], p[2]); + word3_muladd(&w2, &w1, &w0, ws[12], p[1]); + word3_add(&w2, &w1, &w0, z[13]); + ws[13] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[13], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[14]); + word3_muladd(&w2, &w1, &w0, ws[1], p[13]); + word3_muladd(&w2, &w1, &w0, ws[2], p[12]); + word3_muladd(&w2, &w1, &w0, ws[3], p[11]); + word3_muladd(&w2, &w1, &w0, ws[4], p[10]); + word3_muladd(&w2, &w1, &w0, ws[5], p[9]); + word3_muladd(&w2, &w1, &w0, ws[6], p[8]); + word3_muladd(&w2, &w1, &w0, ws[7], p[7]); + word3_muladd(&w2, &w1, &w0, ws[8], p[6]); + word3_muladd(&w2, &w1, &w0, ws[9], p[5]); + word3_muladd(&w2, &w1, &w0, ws[10], p[4]); + word3_muladd(&w2, &w1, &w0, ws[11], p[3]); + word3_muladd(&w2, &w1, &w0, ws[12], p[2]); + word3_muladd(&w2, &w1, &w0, ws[13], p[1]); + word3_add(&w2, &w1, &w0, z[14]); + ws[14] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[14], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[15]); + word3_muladd(&w2, &w1, &w0, ws[1], p[14]); + word3_muladd(&w2, &w1, &w0, ws[2], p[13]); + word3_muladd(&w2, &w1, &w0, ws[3], p[12]); + word3_muladd(&w2, &w1, &w0, ws[4], p[11]); + word3_muladd(&w2, &w1, &w0, ws[5], p[10]); + word3_muladd(&w2, &w1, &w0, ws[6], p[9]); + word3_muladd(&w2, &w1, &w0, ws[7], p[8]); + word3_muladd(&w2, &w1, &w0, ws[8], p[7]); + word3_muladd(&w2, &w1, &w0, ws[9], p[6]); + word3_muladd(&w2, &w1, &w0, ws[10], p[5]); + word3_muladd(&w2, &w1, &w0, ws[11], p[4]); + word3_muladd(&w2, &w1, &w0, ws[12], p[3]); + word3_muladd(&w2, &w1, &w0, ws[13], p[2]); + word3_muladd(&w2, &w1, &w0, ws[14], p[1]); + word3_add(&w2, &w1, &w0, z[15]); + ws[15] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[15], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[16]); + word3_muladd(&w2, &w1, &w0, ws[1], p[15]); + word3_muladd(&w2, &w1, &w0, ws[2], p[14]); + word3_muladd(&w2, &w1, &w0, ws[3], p[13]); + word3_muladd(&w2, &w1, &w0, ws[4], p[12]); + word3_muladd(&w2, &w1, &w0, ws[5], p[11]); + word3_muladd(&w2, &w1, &w0, ws[6], p[10]); + word3_muladd(&w2, &w1, &w0, ws[7], p[9]); + word3_muladd(&w2, &w1, &w0, ws[8], p[8]); + word3_muladd(&w2, &w1, &w0, ws[9], p[7]); + word3_muladd(&w2, &w1, &w0, ws[10], p[6]); + word3_muladd(&w2, &w1, &w0, ws[11], p[5]); + word3_muladd(&w2, &w1, &w0, ws[12], p[4]); + word3_muladd(&w2, &w1, &w0, ws[13], p[3]); + word3_muladd(&w2, &w1, &w0, ws[14], p[2]); + word3_muladd(&w2, &w1, &w0, ws[15], p[1]); + word3_add(&w2, &w1, &w0, z[16]); + ws[16] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[16], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[17]); + word3_muladd(&w2, &w1, &w0, ws[1], p[16]); + word3_muladd(&w2, &w1, &w0, ws[2], p[15]); + word3_muladd(&w2, &w1, &w0, ws[3], p[14]); + word3_muladd(&w2, &w1, &w0, ws[4], p[13]); + word3_muladd(&w2, &w1, &w0, ws[5], p[12]); + word3_muladd(&w2, &w1, &w0, ws[6], p[11]); + word3_muladd(&w2, &w1, &w0, ws[7], p[10]); + word3_muladd(&w2, &w1, &w0, ws[8], p[9]); + word3_muladd(&w2, &w1, &w0, ws[9], p[8]); + word3_muladd(&w2, &w1, &w0, ws[10], p[7]); + word3_muladd(&w2, &w1, &w0, ws[11], p[6]); + word3_muladd(&w2, &w1, &w0, ws[12], p[5]); + word3_muladd(&w2, &w1, &w0, ws[13], p[4]); + word3_muladd(&w2, &w1, &w0, ws[14], p[3]); + word3_muladd(&w2, &w1, &w0, ws[15], p[2]); + word3_muladd(&w2, &w1, &w0, ws[16], p[1]); + word3_add(&w2, &w1, &w0, z[17]); + ws[17] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[17], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[18]); + word3_muladd(&w2, &w1, &w0, ws[1], p[17]); + word3_muladd(&w2, &w1, &w0, ws[2], p[16]); + word3_muladd(&w2, &w1, &w0, ws[3], p[15]); + word3_muladd(&w2, &w1, &w0, ws[4], p[14]); + word3_muladd(&w2, &w1, &w0, ws[5], p[13]); + word3_muladd(&w2, &w1, &w0, ws[6], p[12]); + word3_muladd(&w2, &w1, &w0, ws[7], p[11]); + word3_muladd(&w2, &w1, &w0, ws[8], p[10]); + word3_muladd(&w2, &w1, &w0, ws[9], p[9]); + word3_muladd(&w2, &w1, &w0, ws[10], p[8]); + word3_muladd(&w2, &w1, &w0, ws[11], p[7]); + word3_muladd(&w2, &w1, &w0, ws[12], p[6]); + word3_muladd(&w2, &w1, &w0, ws[13], p[5]); + word3_muladd(&w2, &w1, &w0, ws[14], p[4]); + word3_muladd(&w2, &w1, &w0, ws[15], p[3]); + word3_muladd(&w2, &w1, &w0, ws[16], p[2]); + word3_muladd(&w2, &w1, &w0, ws[17], p[1]); + word3_add(&w2, &w1, &w0, z[18]); + ws[18] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[18], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[19]); + word3_muladd(&w2, &w1, &w0, ws[1], p[18]); + word3_muladd(&w2, &w1, &w0, ws[2], p[17]); + word3_muladd(&w2, &w1, &w0, ws[3], p[16]); + word3_muladd(&w2, &w1, &w0, ws[4], p[15]); + word3_muladd(&w2, &w1, &w0, ws[5], p[14]); + word3_muladd(&w2, &w1, &w0, ws[6], p[13]); + word3_muladd(&w2, &w1, &w0, ws[7], p[12]); + word3_muladd(&w2, &w1, &w0, ws[8], p[11]); + word3_muladd(&w2, &w1, &w0, ws[9], p[10]); + word3_muladd(&w2, &w1, &w0, ws[10], p[9]); + word3_muladd(&w2, &w1, &w0, ws[11], p[8]); + word3_muladd(&w2, &w1, &w0, ws[12], p[7]); + word3_muladd(&w2, &w1, &w0, ws[13], p[6]); + word3_muladd(&w2, &w1, &w0, ws[14], p[5]); + word3_muladd(&w2, &w1, &w0, ws[15], p[4]); + word3_muladd(&w2, &w1, &w0, ws[16], p[3]); + word3_muladd(&w2, &w1, &w0, ws[17], p[2]); + word3_muladd(&w2, &w1, &w0, ws[18], p[1]); + word3_add(&w2, &w1, &w0, z[19]); + ws[19] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[19], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[20]); + word3_muladd(&w2, &w1, &w0, ws[1], p[19]); + word3_muladd(&w2, &w1, &w0, ws[2], p[18]); + word3_muladd(&w2, &w1, &w0, ws[3], p[17]); + word3_muladd(&w2, &w1, &w0, ws[4], p[16]); + word3_muladd(&w2, &w1, &w0, ws[5], p[15]); + word3_muladd(&w2, &w1, &w0, ws[6], p[14]); + word3_muladd(&w2, &w1, &w0, ws[7], p[13]); + word3_muladd(&w2, &w1, &w0, ws[8], p[12]); + word3_muladd(&w2, &w1, &w0, ws[9], p[11]); + word3_muladd(&w2, &w1, &w0, ws[10], p[10]); + word3_muladd(&w2, &w1, &w0, ws[11], p[9]); + word3_muladd(&w2, &w1, &w0, ws[12], p[8]); + word3_muladd(&w2, &w1, &w0, ws[13], p[7]); + word3_muladd(&w2, &w1, &w0, ws[14], p[6]); + word3_muladd(&w2, &w1, &w0, ws[15], p[5]); + word3_muladd(&w2, &w1, &w0, ws[16], p[4]); + word3_muladd(&w2, &w1, &w0, ws[17], p[3]); + word3_muladd(&w2, &w1, &w0, ws[18], p[2]); + word3_muladd(&w2, &w1, &w0, ws[19], p[1]); + word3_add(&w2, &w1, &w0, z[20]); + ws[20] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[20], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[21]); + word3_muladd(&w2, &w1, &w0, ws[1], p[20]); + word3_muladd(&w2, &w1, &w0, ws[2], p[19]); + word3_muladd(&w2, &w1, &w0, ws[3], p[18]); + word3_muladd(&w2, &w1, &w0, ws[4], p[17]); + word3_muladd(&w2, &w1, &w0, ws[5], p[16]); + word3_muladd(&w2, &w1, &w0, ws[6], p[15]); + word3_muladd(&w2, &w1, &w0, ws[7], p[14]); + word3_muladd(&w2, &w1, &w0, ws[8], p[13]); + word3_muladd(&w2, &w1, &w0, ws[9], p[12]); + word3_muladd(&w2, &w1, &w0, ws[10], p[11]); + word3_muladd(&w2, &w1, &w0, ws[11], p[10]); + word3_muladd(&w2, &w1, &w0, ws[12], p[9]); + word3_muladd(&w2, &w1, &w0, ws[13], p[8]); + word3_muladd(&w2, &w1, &w0, ws[14], p[7]); + word3_muladd(&w2, &w1, &w0, ws[15], p[6]); + word3_muladd(&w2, &w1, &w0, ws[16], p[5]); + word3_muladd(&w2, &w1, &w0, ws[17], p[4]); + word3_muladd(&w2, &w1, &w0, ws[18], p[3]); + word3_muladd(&w2, &w1, &w0, ws[19], p[2]); + word3_muladd(&w2, &w1, &w0, ws[20], p[1]); + word3_add(&w2, &w1, &w0, z[21]); + ws[21] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[21], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[22]); + word3_muladd(&w2, &w1, &w0, ws[1], p[21]); + word3_muladd(&w2, &w1, &w0, ws[2], p[20]); + word3_muladd(&w2, &w1, &w0, ws[3], p[19]); + word3_muladd(&w2, &w1, &w0, ws[4], p[18]); + word3_muladd(&w2, &w1, &w0, ws[5], p[17]); + word3_muladd(&w2, &w1, &w0, ws[6], p[16]); + word3_muladd(&w2, &w1, &w0, ws[7], p[15]); + word3_muladd(&w2, &w1, &w0, ws[8], p[14]); + word3_muladd(&w2, &w1, &w0, ws[9], p[13]); + word3_muladd(&w2, &w1, &w0, ws[10], p[12]); + word3_muladd(&w2, &w1, &w0, ws[11], p[11]); + word3_muladd(&w2, &w1, &w0, ws[12], p[10]); + word3_muladd(&w2, &w1, &w0, ws[13], p[9]); + word3_muladd(&w2, &w1, &w0, ws[14], p[8]); + word3_muladd(&w2, &w1, &w0, ws[15], p[7]); + word3_muladd(&w2, &w1, &w0, ws[16], p[6]); + word3_muladd(&w2, &w1, &w0, ws[17], p[5]); + word3_muladd(&w2, &w1, &w0, ws[18], p[4]); + word3_muladd(&w2, &w1, &w0, ws[19], p[3]); + word3_muladd(&w2, &w1, &w0, ws[20], p[2]); + word3_muladd(&w2, &w1, &w0, ws[21], p[1]); + word3_add(&w2, &w1, &w0, z[22]); + ws[22] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[22], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[23]); + word3_muladd(&w2, &w1, &w0, ws[1], p[22]); + word3_muladd(&w2, &w1, &w0, ws[2], p[21]); + word3_muladd(&w2, &w1, &w0, ws[3], p[20]); + word3_muladd(&w2, &w1, &w0, ws[4], p[19]); + word3_muladd(&w2, &w1, &w0, ws[5], p[18]); + word3_muladd(&w2, &w1, &w0, ws[6], p[17]); + word3_muladd(&w2, &w1, &w0, ws[7], p[16]); + word3_muladd(&w2, &w1, &w0, ws[8], p[15]); + word3_muladd(&w2, &w1, &w0, ws[9], p[14]); + word3_muladd(&w2, &w1, &w0, ws[10], p[13]); + word3_muladd(&w2, &w1, &w0, ws[11], p[12]); + word3_muladd(&w2, &w1, &w0, ws[12], p[11]); + word3_muladd(&w2, &w1, &w0, ws[13], p[10]); + word3_muladd(&w2, &w1, &w0, ws[14], p[9]); + word3_muladd(&w2, &w1, &w0, ws[15], p[8]); + word3_muladd(&w2, &w1, &w0, ws[16], p[7]); + word3_muladd(&w2, &w1, &w0, ws[17], p[6]); + word3_muladd(&w2, &w1, &w0, ws[18], p[5]); + word3_muladd(&w2, &w1, &w0, ws[19], p[4]); + word3_muladd(&w2, &w1, &w0, ws[20], p[3]); + word3_muladd(&w2, &w1, &w0, ws[21], p[2]); + word3_muladd(&w2, &w1, &w0, ws[22], p[1]); + word3_add(&w2, &w1, &w0, z[23]); + ws[23] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[23], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[24]); + word3_muladd(&w2, &w1, &w0, ws[1], p[23]); + word3_muladd(&w2, &w1, &w0, ws[2], p[22]); + word3_muladd(&w2, &w1, &w0, ws[3], p[21]); + word3_muladd(&w2, &w1, &w0, ws[4], p[20]); + word3_muladd(&w2, &w1, &w0, ws[5], p[19]); + word3_muladd(&w2, &w1, &w0, ws[6], p[18]); + word3_muladd(&w2, &w1, &w0, ws[7], p[17]); + word3_muladd(&w2, &w1, &w0, ws[8], p[16]); + word3_muladd(&w2, &w1, &w0, ws[9], p[15]); + word3_muladd(&w2, &w1, &w0, ws[10], p[14]); + word3_muladd(&w2, &w1, &w0, ws[11], p[13]); + word3_muladd(&w2, &w1, &w0, ws[12], p[12]); + word3_muladd(&w2, &w1, &w0, ws[13], p[11]); + word3_muladd(&w2, &w1, &w0, ws[14], p[10]); + word3_muladd(&w2, &w1, &w0, ws[15], p[9]); + word3_muladd(&w2, &w1, &w0, ws[16], p[8]); + word3_muladd(&w2, &w1, &w0, ws[17], p[7]); + word3_muladd(&w2, &w1, &w0, ws[18], p[6]); + word3_muladd(&w2, &w1, &w0, ws[19], p[5]); + word3_muladd(&w2, &w1, &w0, ws[20], p[4]); + word3_muladd(&w2, &w1, &w0, ws[21], p[3]); + word3_muladd(&w2, &w1, &w0, ws[22], p[2]); + word3_muladd(&w2, &w1, &w0, ws[23], p[1]); + word3_add(&w2, &w1, &w0, z[24]); + ws[24] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[24], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[25]); + word3_muladd(&w2, &w1, &w0, ws[1], p[24]); + word3_muladd(&w2, &w1, &w0, ws[2], p[23]); + word3_muladd(&w2, &w1, &w0, ws[3], p[22]); + word3_muladd(&w2, &w1, &w0, ws[4], p[21]); + word3_muladd(&w2, &w1, &w0, ws[5], p[20]); + word3_muladd(&w2, &w1, &w0, ws[6], p[19]); + word3_muladd(&w2, &w1, &w0, ws[7], p[18]); + word3_muladd(&w2, &w1, &w0, ws[8], p[17]); + word3_muladd(&w2, &w1, &w0, ws[9], p[16]); + word3_muladd(&w2, &w1, &w0, ws[10], p[15]); + word3_muladd(&w2, &w1, &w0, ws[11], p[14]); + word3_muladd(&w2, &w1, &w0, ws[12], p[13]); + word3_muladd(&w2, &w1, &w0, ws[13], p[12]); + word3_muladd(&w2, &w1, &w0, ws[14], p[11]); + word3_muladd(&w2, &w1, &w0, ws[15], p[10]); + word3_muladd(&w2, &w1, &w0, ws[16], p[9]); + word3_muladd(&w2, &w1, &w0, ws[17], p[8]); + word3_muladd(&w2, &w1, &w0, ws[18], p[7]); + word3_muladd(&w2, &w1, &w0, ws[19], p[6]); + word3_muladd(&w2, &w1, &w0, ws[20], p[5]); + word3_muladd(&w2, &w1, &w0, ws[21], p[4]); + word3_muladd(&w2, &w1, &w0, ws[22], p[3]); + word3_muladd(&w2, &w1, &w0, ws[23], p[2]); + word3_muladd(&w2, &w1, &w0, ws[24], p[1]); + word3_add(&w2, &w1, &w0, z[25]); + ws[25] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[25], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[26]); + word3_muladd(&w2, &w1, &w0, ws[1], p[25]); + word3_muladd(&w2, &w1, &w0, ws[2], p[24]); + word3_muladd(&w2, &w1, &w0, ws[3], p[23]); + word3_muladd(&w2, &w1, &w0, ws[4], p[22]); + word3_muladd(&w2, &w1, &w0, ws[5], p[21]); + word3_muladd(&w2, &w1, &w0, ws[6], p[20]); + word3_muladd(&w2, &w1, &w0, ws[7], p[19]); + word3_muladd(&w2, &w1, &w0, ws[8], p[18]); + word3_muladd(&w2, &w1, &w0, ws[9], p[17]); + word3_muladd(&w2, &w1, &w0, ws[10], p[16]); + word3_muladd(&w2, &w1, &w0, ws[11], p[15]); + word3_muladd(&w2, &w1, &w0, ws[12], p[14]); + word3_muladd(&w2, &w1, &w0, ws[13], p[13]); + word3_muladd(&w2, &w1, &w0, ws[14], p[12]); + word3_muladd(&w2, &w1, &w0, ws[15], p[11]); + word3_muladd(&w2, &w1, &w0, ws[16], p[10]); + word3_muladd(&w2, &w1, &w0, ws[17], p[9]); + word3_muladd(&w2, &w1, &w0, ws[18], p[8]); + word3_muladd(&w2, &w1, &w0, ws[19], p[7]); + word3_muladd(&w2, &w1, &w0, ws[20], p[6]); + word3_muladd(&w2, &w1, &w0, ws[21], p[5]); + word3_muladd(&w2, &w1, &w0, ws[22], p[4]); + word3_muladd(&w2, &w1, &w0, ws[23], p[3]); + word3_muladd(&w2, &w1, &w0, ws[24], p[2]); + word3_muladd(&w2, &w1, &w0, ws[25], p[1]); + word3_add(&w2, &w1, &w0, z[26]); + ws[26] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[26], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[27]); + word3_muladd(&w2, &w1, &w0, ws[1], p[26]); + word3_muladd(&w2, &w1, &w0, ws[2], p[25]); + word3_muladd(&w2, &w1, &w0, ws[3], p[24]); + word3_muladd(&w2, &w1, &w0, ws[4], p[23]); + word3_muladd(&w2, &w1, &w0, ws[5], p[22]); + word3_muladd(&w2, &w1, &w0, ws[6], p[21]); + word3_muladd(&w2, &w1, &w0, ws[7], p[20]); + word3_muladd(&w2, &w1, &w0, ws[8], p[19]); + word3_muladd(&w2, &w1, &w0, ws[9], p[18]); + word3_muladd(&w2, &w1, &w0, ws[10], p[17]); + word3_muladd(&w2, &w1, &w0, ws[11], p[16]); + word3_muladd(&w2, &w1, &w0, ws[12], p[15]); + word3_muladd(&w2, &w1, &w0, ws[13], p[14]); + word3_muladd(&w2, &w1, &w0, ws[14], p[13]); + word3_muladd(&w2, &w1, &w0, ws[15], p[12]); + word3_muladd(&w2, &w1, &w0, ws[16], p[11]); + word3_muladd(&w2, &w1, &w0, ws[17], p[10]); + word3_muladd(&w2, &w1, &w0, ws[18], p[9]); + word3_muladd(&w2, &w1, &w0, ws[19], p[8]); + word3_muladd(&w2, &w1, &w0, ws[20], p[7]); + word3_muladd(&w2, &w1, &w0, ws[21], p[6]); + word3_muladd(&w2, &w1, &w0, ws[22], p[5]); + word3_muladd(&w2, &w1, &w0, ws[23], p[4]); + word3_muladd(&w2, &w1, &w0, ws[24], p[3]); + word3_muladd(&w2, &w1, &w0, ws[25], p[2]); + word3_muladd(&w2, &w1, &w0, ws[26], p[1]); + word3_add(&w2, &w1, &w0, z[27]); + ws[27] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[27], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[28]); + word3_muladd(&w2, &w1, &w0, ws[1], p[27]); + word3_muladd(&w2, &w1, &w0, ws[2], p[26]); + word3_muladd(&w2, &w1, &w0, ws[3], p[25]); + word3_muladd(&w2, &w1, &w0, ws[4], p[24]); + word3_muladd(&w2, &w1, &w0, ws[5], p[23]); + word3_muladd(&w2, &w1, &w0, ws[6], p[22]); + word3_muladd(&w2, &w1, &w0, ws[7], p[21]); + word3_muladd(&w2, &w1, &w0, ws[8], p[20]); + word3_muladd(&w2, &w1, &w0, ws[9], p[19]); + word3_muladd(&w2, &w1, &w0, ws[10], p[18]); + word3_muladd(&w2, &w1, &w0, ws[11], p[17]); + word3_muladd(&w2, &w1, &w0, ws[12], p[16]); + word3_muladd(&w2, &w1, &w0, ws[13], p[15]); + word3_muladd(&w2, &w1, &w0, ws[14], p[14]); + word3_muladd(&w2, &w1, &w0, ws[15], p[13]); + word3_muladd(&w2, &w1, &w0, ws[16], p[12]); + word3_muladd(&w2, &w1, &w0, ws[17], p[11]); + word3_muladd(&w2, &w1, &w0, ws[18], p[10]); + word3_muladd(&w2, &w1, &w0, ws[19], p[9]); + word3_muladd(&w2, &w1, &w0, ws[20], p[8]); + word3_muladd(&w2, &w1, &w0, ws[21], p[7]); + word3_muladd(&w2, &w1, &w0, ws[22], p[6]); + word3_muladd(&w2, &w1, &w0, ws[23], p[5]); + word3_muladd(&w2, &w1, &w0, ws[24], p[4]); + word3_muladd(&w2, &w1, &w0, ws[25], p[3]); + word3_muladd(&w2, &w1, &w0, ws[26], p[2]); + word3_muladd(&w2, &w1, &w0, ws[27], p[1]); + word3_add(&w2, &w1, &w0, z[28]); + ws[28] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[28], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[29]); + word3_muladd(&w2, &w1, &w0, ws[1], p[28]); + word3_muladd(&w2, &w1, &w0, ws[2], p[27]); + word3_muladd(&w2, &w1, &w0, ws[3], p[26]); + word3_muladd(&w2, &w1, &w0, ws[4], p[25]); + word3_muladd(&w2, &w1, &w0, ws[5], p[24]); + word3_muladd(&w2, &w1, &w0, ws[6], p[23]); + word3_muladd(&w2, &w1, &w0, ws[7], p[22]); + word3_muladd(&w2, &w1, &w0, ws[8], p[21]); + word3_muladd(&w2, &w1, &w0, ws[9], p[20]); + word3_muladd(&w2, &w1, &w0, ws[10], p[19]); + word3_muladd(&w2, &w1, &w0, ws[11], p[18]); + word3_muladd(&w2, &w1, &w0, ws[12], p[17]); + word3_muladd(&w2, &w1, &w0, ws[13], p[16]); + word3_muladd(&w2, &w1, &w0, ws[14], p[15]); + word3_muladd(&w2, &w1, &w0, ws[15], p[14]); + word3_muladd(&w2, &w1, &w0, ws[16], p[13]); + word3_muladd(&w2, &w1, &w0, ws[17], p[12]); + word3_muladd(&w2, &w1, &w0, ws[18], p[11]); + word3_muladd(&w2, &w1, &w0, ws[19], p[10]); + word3_muladd(&w2, &w1, &w0, ws[20], p[9]); + word3_muladd(&w2, &w1, &w0, ws[21], p[8]); + word3_muladd(&w2, &w1, &w0, ws[22], p[7]); + word3_muladd(&w2, &w1, &w0, ws[23], p[6]); + word3_muladd(&w2, &w1, &w0, ws[24], p[5]); + word3_muladd(&w2, &w1, &w0, ws[25], p[4]); + word3_muladd(&w2, &w1, &w0, ws[26], p[3]); + word3_muladd(&w2, &w1, &w0, ws[27], p[2]); + word3_muladd(&w2, &w1, &w0, ws[28], p[1]); + word3_add(&w2, &w1, &w0, z[29]); + ws[29] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[29], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[30]); + word3_muladd(&w2, &w1, &w0, ws[1], p[29]); + word3_muladd(&w2, &w1, &w0, ws[2], p[28]); + word3_muladd(&w2, &w1, &w0, ws[3], p[27]); + word3_muladd(&w2, &w1, &w0, ws[4], p[26]); + word3_muladd(&w2, &w1, &w0, ws[5], p[25]); + word3_muladd(&w2, &w1, &w0, ws[6], p[24]); + word3_muladd(&w2, &w1, &w0, ws[7], p[23]); + word3_muladd(&w2, &w1, &w0, ws[8], p[22]); + word3_muladd(&w2, &w1, &w0, ws[9], p[21]); + word3_muladd(&w2, &w1, &w0, ws[10], p[20]); + word3_muladd(&w2, &w1, &w0, ws[11], p[19]); + word3_muladd(&w2, &w1, &w0, ws[12], p[18]); + word3_muladd(&w2, &w1, &w0, ws[13], p[17]); + word3_muladd(&w2, &w1, &w0, ws[14], p[16]); + word3_muladd(&w2, &w1, &w0, ws[15], p[15]); + word3_muladd(&w2, &w1, &w0, ws[16], p[14]); + word3_muladd(&w2, &w1, &w0, ws[17], p[13]); + word3_muladd(&w2, &w1, &w0, ws[18], p[12]); + word3_muladd(&w2, &w1, &w0, ws[19], p[11]); + word3_muladd(&w2, &w1, &w0, ws[20], p[10]); + word3_muladd(&w2, &w1, &w0, ws[21], p[9]); + word3_muladd(&w2, &w1, &w0, ws[22], p[8]); + word3_muladd(&w2, &w1, &w0, ws[23], p[7]); + word3_muladd(&w2, &w1, &w0, ws[24], p[6]); + word3_muladd(&w2, &w1, &w0, ws[25], p[5]); + word3_muladd(&w2, &w1, &w0, ws[26], p[4]); + word3_muladd(&w2, &w1, &w0, ws[27], p[3]); + word3_muladd(&w2, &w1, &w0, ws[28], p[2]); + word3_muladd(&w2, &w1, &w0, ws[29], p[1]); + word3_add(&w2, &w1, &w0, z[30]); + ws[30] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[30], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[0], p[31]); + word3_muladd(&w2, &w1, &w0, ws[1], p[30]); + word3_muladd(&w2, &w1, &w0, ws[2], p[29]); + word3_muladd(&w2, &w1, &w0, ws[3], p[28]); + word3_muladd(&w2, &w1, &w0, ws[4], p[27]); + word3_muladd(&w2, &w1, &w0, ws[5], p[26]); + word3_muladd(&w2, &w1, &w0, ws[6], p[25]); + word3_muladd(&w2, &w1, &w0, ws[7], p[24]); + word3_muladd(&w2, &w1, &w0, ws[8], p[23]); + word3_muladd(&w2, &w1, &w0, ws[9], p[22]); + word3_muladd(&w2, &w1, &w0, ws[10], p[21]); + word3_muladd(&w2, &w1, &w0, ws[11], p[20]); + word3_muladd(&w2, &w1, &w0, ws[12], p[19]); + word3_muladd(&w2, &w1, &w0, ws[13], p[18]); + word3_muladd(&w2, &w1, &w0, ws[14], p[17]); + word3_muladd(&w2, &w1, &w0, ws[15], p[16]); + word3_muladd(&w2, &w1, &w0, ws[16], p[15]); + word3_muladd(&w2, &w1, &w0, ws[17], p[14]); + word3_muladd(&w2, &w1, &w0, ws[18], p[13]); + word3_muladd(&w2, &w1, &w0, ws[19], p[12]); + word3_muladd(&w2, &w1, &w0, ws[20], p[11]); + word3_muladd(&w2, &w1, &w0, ws[21], p[10]); + word3_muladd(&w2, &w1, &w0, ws[22], p[9]); + word3_muladd(&w2, &w1, &w0, ws[23], p[8]); + word3_muladd(&w2, &w1, &w0, ws[24], p[7]); + word3_muladd(&w2, &w1, &w0, ws[25], p[6]); + word3_muladd(&w2, &w1, &w0, ws[26], p[5]); + word3_muladd(&w2, &w1, &w0, ws[27], p[4]); + word3_muladd(&w2, &w1, &w0, ws[28], p[3]); + word3_muladd(&w2, &w1, &w0, ws[29], p[2]); + word3_muladd(&w2, &w1, &w0, ws[30], p[1]); + word3_add(&w2, &w1, &w0, z[31]); + ws[31] = w0 * p_dash; + word3_muladd(&w2, &w1, &w0, ws[31], p[0]); + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[1], p[31]); + word3_muladd(&w2, &w1, &w0, ws[2], p[30]); + word3_muladd(&w2, &w1, &w0, ws[3], p[29]); + word3_muladd(&w2, &w1, &w0, ws[4], p[28]); + word3_muladd(&w2, &w1, &w0, ws[5], p[27]); + word3_muladd(&w2, &w1, &w0, ws[6], p[26]); + word3_muladd(&w2, &w1, &w0, ws[7], p[25]); + word3_muladd(&w2, &w1, &w0, ws[8], p[24]); + word3_muladd(&w2, &w1, &w0, ws[9], p[23]); + word3_muladd(&w2, &w1, &w0, ws[10], p[22]); + word3_muladd(&w2, &w1, &w0, ws[11], p[21]); + word3_muladd(&w2, &w1, &w0, ws[12], p[20]); + word3_muladd(&w2, &w1, &w0, ws[13], p[19]); + word3_muladd(&w2, &w1, &w0, ws[14], p[18]); + word3_muladd(&w2, &w1, &w0, ws[15], p[17]); + word3_muladd(&w2, &w1, &w0, ws[16], p[16]); + word3_muladd(&w2, &w1, &w0, ws[17], p[15]); + word3_muladd(&w2, &w1, &w0, ws[18], p[14]); + word3_muladd(&w2, &w1, &w0, ws[19], p[13]); + word3_muladd(&w2, &w1, &w0, ws[20], p[12]); + word3_muladd(&w2, &w1, &w0, ws[21], p[11]); + word3_muladd(&w2, &w1, &w0, ws[22], p[10]); + word3_muladd(&w2, &w1, &w0, ws[23], p[9]); + word3_muladd(&w2, &w1, &w0, ws[24], p[8]); + word3_muladd(&w2, &w1, &w0, ws[25], p[7]); + word3_muladd(&w2, &w1, &w0, ws[26], p[6]); + word3_muladd(&w2, &w1, &w0, ws[27], p[5]); + word3_muladd(&w2, &w1, &w0, ws[28], p[4]); + word3_muladd(&w2, &w1, &w0, ws[29], p[3]); + word3_muladd(&w2, &w1, &w0, ws[30], p[2]); + word3_muladd(&w2, &w1, &w0, ws[31], p[1]); + word3_add(&w2, &w1, &w0, z[32]); + ws[0] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[2], p[31]); + word3_muladd(&w2, &w1, &w0, ws[3], p[30]); + word3_muladd(&w2, &w1, &w0, ws[4], p[29]); + word3_muladd(&w2, &w1, &w0, ws[5], p[28]); + word3_muladd(&w2, &w1, &w0, ws[6], p[27]); + word3_muladd(&w2, &w1, &w0, ws[7], p[26]); + word3_muladd(&w2, &w1, &w0, ws[8], p[25]); + word3_muladd(&w2, &w1, &w0, ws[9], p[24]); + word3_muladd(&w2, &w1, &w0, ws[10], p[23]); + word3_muladd(&w2, &w1, &w0, ws[11], p[22]); + word3_muladd(&w2, &w1, &w0, ws[12], p[21]); + word3_muladd(&w2, &w1, &w0, ws[13], p[20]); + word3_muladd(&w2, &w1, &w0, ws[14], p[19]); + word3_muladd(&w2, &w1, &w0, ws[15], p[18]); + word3_muladd(&w2, &w1, &w0, ws[16], p[17]); + word3_muladd(&w2, &w1, &w0, ws[17], p[16]); + word3_muladd(&w2, &w1, &w0, ws[18], p[15]); + word3_muladd(&w2, &w1, &w0, ws[19], p[14]); + word3_muladd(&w2, &w1, &w0, ws[20], p[13]); + word3_muladd(&w2, &w1, &w0, ws[21], p[12]); + word3_muladd(&w2, &w1, &w0, ws[22], p[11]); + word3_muladd(&w2, &w1, &w0, ws[23], p[10]); + word3_muladd(&w2, &w1, &w0, ws[24], p[9]); + word3_muladd(&w2, &w1, &w0, ws[25], p[8]); + word3_muladd(&w2, &w1, &w0, ws[26], p[7]); + word3_muladd(&w2, &w1, &w0, ws[27], p[6]); + word3_muladd(&w2, &w1, &w0, ws[28], p[5]); + word3_muladd(&w2, &w1, &w0, ws[29], p[4]); + word3_muladd(&w2, &w1, &w0, ws[30], p[3]); + word3_muladd(&w2, &w1, &w0, ws[31], p[2]); + word3_add(&w2, &w1, &w0, z[33]); + ws[1] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[3], p[31]); + word3_muladd(&w2, &w1, &w0, ws[4], p[30]); + word3_muladd(&w2, &w1, &w0, ws[5], p[29]); + word3_muladd(&w2, &w1, &w0, ws[6], p[28]); + word3_muladd(&w2, &w1, &w0, ws[7], p[27]); + word3_muladd(&w2, &w1, &w0, ws[8], p[26]); + word3_muladd(&w2, &w1, &w0, ws[9], p[25]); + word3_muladd(&w2, &w1, &w0, ws[10], p[24]); + word3_muladd(&w2, &w1, &w0, ws[11], p[23]); + word3_muladd(&w2, &w1, &w0, ws[12], p[22]); + word3_muladd(&w2, &w1, &w0, ws[13], p[21]); + word3_muladd(&w2, &w1, &w0, ws[14], p[20]); + word3_muladd(&w2, &w1, &w0, ws[15], p[19]); + word3_muladd(&w2, &w1, &w0, ws[16], p[18]); + word3_muladd(&w2, &w1, &w0, ws[17], p[17]); + word3_muladd(&w2, &w1, &w0, ws[18], p[16]); + word3_muladd(&w2, &w1, &w0, ws[19], p[15]); + word3_muladd(&w2, &w1, &w0, ws[20], p[14]); + word3_muladd(&w2, &w1, &w0, ws[21], p[13]); + word3_muladd(&w2, &w1, &w0, ws[22], p[12]); + word3_muladd(&w2, &w1, &w0, ws[23], p[11]); + word3_muladd(&w2, &w1, &w0, ws[24], p[10]); + word3_muladd(&w2, &w1, &w0, ws[25], p[9]); + word3_muladd(&w2, &w1, &w0, ws[26], p[8]); + word3_muladd(&w2, &w1, &w0, ws[27], p[7]); + word3_muladd(&w2, &w1, &w0, ws[28], p[6]); + word3_muladd(&w2, &w1, &w0, ws[29], p[5]); + word3_muladd(&w2, &w1, &w0, ws[30], p[4]); + word3_muladd(&w2, &w1, &w0, ws[31], p[3]); + word3_add(&w2, &w1, &w0, z[34]); + ws[2] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[4], p[31]); + word3_muladd(&w2, &w1, &w0, ws[5], p[30]); + word3_muladd(&w2, &w1, &w0, ws[6], p[29]); + word3_muladd(&w2, &w1, &w0, ws[7], p[28]); + word3_muladd(&w2, &w1, &w0, ws[8], p[27]); + word3_muladd(&w2, &w1, &w0, ws[9], p[26]); + word3_muladd(&w2, &w1, &w0, ws[10], p[25]); + word3_muladd(&w2, &w1, &w0, ws[11], p[24]); + word3_muladd(&w2, &w1, &w0, ws[12], p[23]); + word3_muladd(&w2, &w1, &w0, ws[13], p[22]); + word3_muladd(&w2, &w1, &w0, ws[14], p[21]); + word3_muladd(&w2, &w1, &w0, ws[15], p[20]); + word3_muladd(&w2, &w1, &w0, ws[16], p[19]); + word3_muladd(&w2, &w1, &w0, ws[17], p[18]); + word3_muladd(&w2, &w1, &w0, ws[18], p[17]); + word3_muladd(&w2, &w1, &w0, ws[19], p[16]); + word3_muladd(&w2, &w1, &w0, ws[20], p[15]); + word3_muladd(&w2, &w1, &w0, ws[21], p[14]); + word3_muladd(&w2, &w1, &w0, ws[22], p[13]); + word3_muladd(&w2, &w1, &w0, ws[23], p[12]); + word3_muladd(&w2, &w1, &w0, ws[24], p[11]); + word3_muladd(&w2, &w1, &w0, ws[25], p[10]); + word3_muladd(&w2, &w1, &w0, ws[26], p[9]); + word3_muladd(&w2, &w1, &w0, ws[27], p[8]); + word3_muladd(&w2, &w1, &w0, ws[28], p[7]); + word3_muladd(&w2, &w1, &w0, ws[29], p[6]); + word3_muladd(&w2, &w1, &w0, ws[30], p[5]); + word3_muladd(&w2, &w1, &w0, ws[31], p[4]); + word3_add(&w2, &w1, &w0, z[35]); + ws[3] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[5], p[31]); + word3_muladd(&w2, &w1, &w0, ws[6], p[30]); + word3_muladd(&w2, &w1, &w0, ws[7], p[29]); + word3_muladd(&w2, &w1, &w0, ws[8], p[28]); + word3_muladd(&w2, &w1, &w0, ws[9], p[27]); + word3_muladd(&w2, &w1, &w0, ws[10], p[26]); + word3_muladd(&w2, &w1, &w0, ws[11], p[25]); + word3_muladd(&w2, &w1, &w0, ws[12], p[24]); + word3_muladd(&w2, &w1, &w0, ws[13], p[23]); + word3_muladd(&w2, &w1, &w0, ws[14], p[22]); + word3_muladd(&w2, &w1, &w0, ws[15], p[21]); + word3_muladd(&w2, &w1, &w0, ws[16], p[20]); + word3_muladd(&w2, &w1, &w0, ws[17], p[19]); + word3_muladd(&w2, &w1, &w0, ws[18], p[18]); + word3_muladd(&w2, &w1, &w0, ws[19], p[17]); + word3_muladd(&w2, &w1, &w0, ws[20], p[16]); + word3_muladd(&w2, &w1, &w0, ws[21], p[15]); + word3_muladd(&w2, &w1, &w0, ws[22], p[14]); + word3_muladd(&w2, &w1, &w0, ws[23], p[13]); + word3_muladd(&w2, &w1, &w0, ws[24], p[12]); + word3_muladd(&w2, &w1, &w0, ws[25], p[11]); + word3_muladd(&w2, &w1, &w0, ws[26], p[10]); + word3_muladd(&w2, &w1, &w0, ws[27], p[9]); + word3_muladd(&w2, &w1, &w0, ws[28], p[8]); + word3_muladd(&w2, &w1, &w0, ws[29], p[7]); + word3_muladd(&w2, &w1, &w0, ws[30], p[6]); + word3_muladd(&w2, &w1, &w0, ws[31], p[5]); + word3_add(&w2, &w1, &w0, z[36]); + ws[4] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[6], p[31]); + word3_muladd(&w2, &w1, &w0, ws[7], p[30]); + word3_muladd(&w2, &w1, &w0, ws[8], p[29]); + word3_muladd(&w2, &w1, &w0, ws[9], p[28]); + word3_muladd(&w2, &w1, &w0, ws[10], p[27]); + word3_muladd(&w2, &w1, &w0, ws[11], p[26]); + word3_muladd(&w2, &w1, &w0, ws[12], p[25]); + word3_muladd(&w2, &w1, &w0, ws[13], p[24]); + word3_muladd(&w2, &w1, &w0, ws[14], p[23]); + word3_muladd(&w2, &w1, &w0, ws[15], p[22]); + word3_muladd(&w2, &w1, &w0, ws[16], p[21]); + word3_muladd(&w2, &w1, &w0, ws[17], p[20]); + word3_muladd(&w2, &w1, &w0, ws[18], p[19]); + word3_muladd(&w2, &w1, &w0, ws[19], p[18]); + word3_muladd(&w2, &w1, &w0, ws[20], p[17]); + word3_muladd(&w2, &w1, &w0, ws[21], p[16]); + word3_muladd(&w2, &w1, &w0, ws[22], p[15]); + word3_muladd(&w2, &w1, &w0, ws[23], p[14]); + word3_muladd(&w2, &w1, &w0, ws[24], p[13]); + word3_muladd(&w2, &w1, &w0, ws[25], p[12]); + word3_muladd(&w2, &w1, &w0, ws[26], p[11]); + word3_muladd(&w2, &w1, &w0, ws[27], p[10]); + word3_muladd(&w2, &w1, &w0, ws[28], p[9]); + word3_muladd(&w2, &w1, &w0, ws[29], p[8]); + word3_muladd(&w2, &w1, &w0, ws[30], p[7]); + word3_muladd(&w2, &w1, &w0, ws[31], p[6]); + word3_add(&w2, &w1, &w0, z[37]); + ws[5] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[7], p[31]); + word3_muladd(&w2, &w1, &w0, ws[8], p[30]); + word3_muladd(&w2, &w1, &w0, ws[9], p[29]); + word3_muladd(&w2, &w1, &w0, ws[10], p[28]); + word3_muladd(&w2, &w1, &w0, ws[11], p[27]); + word3_muladd(&w2, &w1, &w0, ws[12], p[26]); + word3_muladd(&w2, &w1, &w0, ws[13], p[25]); + word3_muladd(&w2, &w1, &w0, ws[14], p[24]); + word3_muladd(&w2, &w1, &w0, ws[15], p[23]); + word3_muladd(&w2, &w1, &w0, ws[16], p[22]); + word3_muladd(&w2, &w1, &w0, ws[17], p[21]); + word3_muladd(&w2, &w1, &w0, ws[18], p[20]); + word3_muladd(&w2, &w1, &w0, ws[19], p[19]); + word3_muladd(&w2, &w1, &w0, ws[20], p[18]); + word3_muladd(&w2, &w1, &w0, ws[21], p[17]); + word3_muladd(&w2, &w1, &w0, ws[22], p[16]); + word3_muladd(&w2, &w1, &w0, ws[23], p[15]); + word3_muladd(&w2, &w1, &w0, ws[24], p[14]); + word3_muladd(&w2, &w1, &w0, ws[25], p[13]); + word3_muladd(&w2, &w1, &w0, ws[26], p[12]); + word3_muladd(&w2, &w1, &w0, ws[27], p[11]); + word3_muladd(&w2, &w1, &w0, ws[28], p[10]); + word3_muladd(&w2, &w1, &w0, ws[29], p[9]); + word3_muladd(&w2, &w1, &w0, ws[30], p[8]); + word3_muladd(&w2, &w1, &w0, ws[31], p[7]); + word3_add(&w2, &w1, &w0, z[38]); + ws[6] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[8], p[31]); + word3_muladd(&w2, &w1, &w0, ws[9], p[30]); + word3_muladd(&w2, &w1, &w0, ws[10], p[29]); + word3_muladd(&w2, &w1, &w0, ws[11], p[28]); + word3_muladd(&w2, &w1, &w0, ws[12], p[27]); + word3_muladd(&w2, &w1, &w0, ws[13], p[26]); + word3_muladd(&w2, &w1, &w0, ws[14], p[25]); + word3_muladd(&w2, &w1, &w0, ws[15], p[24]); + word3_muladd(&w2, &w1, &w0, ws[16], p[23]); + word3_muladd(&w2, &w1, &w0, ws[17], p[22]); + word3_muladd(&w2, &w1, &w0, ws[18], p[21]); + word3_muladd(&w2, &w1, &w0, ws[19], p[20]); + word3_muladd(&w2, &w1, &w0, ws[20], p[19]); + word3_muladd(&w2, &w1, &w0, ws[21], p[18]); + word3_muladd(&w2, &w1, &w0, ws[22], p[17]); + word3_muladd(&w2, &w1, &w0, ws[23], p[16]); + word3_muladd(&w2, &w1, &w0, ws[24], p[15]); + word3_muladd(&w2, &w1, &w0, ws[25], p[14]); + word3_muladd(&w2, &w1, &w0, ws[26], p[13]); + word3_muladd(&w2, &w1, &w0, ws[27], p[12]); + word3_muladd(&w2, &w1, &w0, ws[28], p[11]); + word3_muladd(&w2, &w1, &w0, ws[29], p[10]); + word3_muladd(&w2, &w1, &w0, ws[30], p[9]); + word3_muladd(&w2, &w1, &w0, ws[31], p[8]); + word3_add(&w2, &w1, &w0, z[39]); + ws[7] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[9], p[31]); + word3_muladd(&w2, &w1, &w0, ws[10], p[30]); + word3_muladd(&w2, &w1, &w0, ws[11], p[29]); + word3_muladd(&w2, &w1, &w0, ws[12], p[28]); + word3_muladd(&w2, &w1, &w0, ws[13], p[27]); + word3_muladd(&w2, &w1, &w0, ws[14], p[26]); + word3_muladd(&w2, &w1, &w0, ws[15], p[25]); + word3_muladd(&w2, &w1, &w0, ws[16], p[24]); + word3_muladd(&w2, &w1, &w0, ws[17], p[23]); + word3_muladd(&w2, &w1, &w0, ws[18], p[22]); + word3_muladd(&w2, &w1, &w0, ws[19], p[21]); + word3_muladd(&w2, &w1, &w0, ws[20], p[20]); + word3_muladd(&w2, &w1, &w0, ws[21], p[19]); + word3_muladd(&w2, &w1, &w0, ws[22], p[18]); + word3_muladd(&w2, &w1, &w0, ws[23], p[17]); + word3_muladd(&w2, &w1, &w0, ws[24], p[16]); + word3_muladd(&w2, &w1, &w0, ws[25], p[15]); + word3_muladd(&w2, &w1, &w0, ws[26], p[14]); + word3_muladd(&w2, &w1, &w0, ws[27], p[13]); + word3_muladd(&w2, &w1, &w0, ws[28], p[12]); + word3_muladd(&w2, &w1, &w0, ws[29], p[11]); + word3_muladd(&w2, &w1, &w0, ws[30], p[10]); + word3_muladd(&w2, &w1, &w0, ws[31], p[9]); + word3_add(&w2, &w1, &w0, z[40]); + ws[8] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[10], p[31]); + word3_muladd(&w2, &w1, &w0, ws[11], p[30]); + word3_muladd(&w2, &w1, &w0, ws[12], p[29]); + word3_muladd(&w2, &w1, &w0, ws[13], p[28]); + word3_muladd(&w2, &w1, &w0, ws[14], p[27]); + word3_muladd(&w2, &w1, &w0, ws[15], p[26]); + word3_muladd(&w2, &w1, &w0, ws[16], p[25]); + word3_muladd(&w2, &w1, &w0, ws[17], p[24]); + word3_muladd(&w2, &w1, &w0, ws[18], p[23]); + word3_muladd(&w2, &w1, &w0, ws[19], p[22]); + word3_muladd(&w2, &w1, &w0, ws[20], p[21]); + word3_muladd(&w2, &w1, &w0, ws[21], p[20]); + word3_muladd(&w2, &w1, &w0, ws[22], p[19]); + word3_muladd(&w2, &w1, &w0, ws[23], p[18]); + word3_muladd(&w2, &w1, &w0, ws[24], p[17]); + word3_muladd(&w2, &w1, &w0, ws[25], p[16]); + word3_muladd(&w2, &w1, &w0, ws[26], p[15]); + word3_muladd(&w2, &w1, &w0, ws[27], p[14]); + word3_muladd(&w2, &w1, &w0, ws[28], p[13]); + word3_muladd(&w2, &w1, &w0, ws[29], p[12]); + word3_muladd(&w2, &w1, &w0, ws[30], p[11]); + word3_muladd(&w2, &w1, &w0, ws[31], p[10]); + word3_add(&w2, &w1, &w0, z[41]); + ws[9] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[11], p[31]); + word3_muladd(&w2, &w1, &w0, ws[12], p[30]); + word3_muladd(&w2, &w1, &w0, ws[13], p[29]); + word3_muladd(&w2, &w1, &w0, ws[14], p[28]); + word3_muladd(&w2, &w1, &w0, ws[15], p[27]); + word3_muladd(&w2, &w1, &w0, ws[16], p[26]); + word3_muladd(&w2, &w1, &w0, ws[17], p[25]); + word3_muladd(&w2, &w1, &w0, ws[18], p[24]); + word3_muladd(&w2, &w1, &w0, ws[19], p[23]); + word3_muladd(&w2, &w1, &w0, ws[20], p[22]); + word3_muladd(&w2, &w1, &w0, ws[21], p[21]); + word3_muladd(&w2, &w1, &w0, ws[22], p[20]); + word3_muladd(&w2, &w1, &w0, ws[23], p[19]); + word3_muladd(&w2, &w1, &w0, ws[24], p[18]); + word3_muladd(&w2, &w1, &w0, ws[25], p[17]); + word3_muladd(&w2, &w1, &w0, ws[26], p[16]); + word3_muladd(&w2, &w1, &w0, ws[27], p[15]); + word3_muladd(&w2, &w1, &w0, ws[28], p[14]); + word3_muladd(&w2, &w1, &w0, ws[29], p[13]); + word3_muladd(&w2, &w1, &w0, ws[30], p[12]); + word3_muladd(&w2, &w1, &w0, ws[31], p[11]); + word3_add(&w2, &w1, &w0, z[42]); + ws[10] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[12], p[31]); + word3_muladd(&w2, &w1, &w0, ws[13], p[30]); + word3_muladd(&w2, &w1, &w0, ws[14], p[29]); + word3_muladd(&w2, &w1, &w0, ws[15], p[28]); + word3_muladd(&w2, &w1, &w0, ws[16], p[27]); + word3_muladd(&w2, &w1, &w0, ws[17], p[26]); + word3_muladd(&w2, &w1, &w0, ws[18], p[25]); + word3_muladd(&w2, &w1, &w0, ws[19], p[24]); + word3_muladd(&w2, &w1, &w0, ws[20], p[23]); + word3_muladd(&w2, &w1, &w0, ws[21], p[22]); + word3_muladd(&w2, &w1, &w0, ws[22], p[21]); + word3_muladd(&w2, &w1, &w0, ws[23], p[20]); + word3_muladd(&w2, &w1, &w0, ws[24], p[19]); + word3_muladd(&w2, &w1, &w0, ws[25], p[18]); + word3_muladd(&w2, &w1, &w0, ws[26], p[17]); + word3_muladd(&w2, &w1, &w0, ws[27], p[16]); + word3_muladd(&w2, &w1, &w0, ws[28], p[15]); + word3_muladd(&w2, &w1, &w0, ws[29], p[14]); + word3_muladd(&w2, &w1, &w0, ws[30], p[13]); + word3_muladd(&w2, &w1, &w0, ws[31], p[12]); + word3_add(&w2, &w1, &w0, z[43]); + ws[11] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[13], p[31]); + word3_muladd(&w2, &w1, &w0, ws[14], p[30]); + word3_muladd(&w2, &w1, &w0, ws[15], p[29]); + word3_muladd(&w2, &w1, &w0, ws[16], p[28]); + word3_muladd(&w2, &w1, &w0, ws[17], p[27]); + word3_muladd(&w2, &w1, &w0, ws[18], p[26]); + word3_muladd(&w2, &w1, &w0, ws[19], p[25]); + word3_muladd(&w2, &w1, &w0, ws[20], p[24]); + word3_muladd(&w2, &w1, &w0, ws[21], p[23]); + word3_muladd(&w2, &w1, &w0, ws[22], p[22]); + word3_muladd(&w2, &w1, &w0, ws[23], p[21]); + word3_muladd(&w2, &w1, &w0, ws[24], p[20]); + word3_muladd(&w2, &w1, &w0, ws[25], p[19]); + word3_muladd(&w2, &w1, &w0, ws[26], p[18]); + word3_muladd(&w2, &w1, &w0, ws[27], p[17]); + word3_muladd(&w2, &w1, &w0, ws[28], p[16]); + word3_muladd(&w2, &w1, &w0, ws[29], p[15]); + word3_muladd(&w2, &w1, &w0, ws[30], p[14]); + word3_muladd(&w2, &w1, &w0, ws[31], p[13]); + word3_add(&w2, &w1, &w0, z[44]); + ws[12] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[14], p[31]); + word3_muladd(&w2, &w1, &w0, ws[15], p[30]); + word3_muladd(&w2, &w1, &w0, ws[16], p[29]); + word3_muladd(&w2, &w1, &w0, ws[17], p[28]); + word3_muladd(&w2, &w1, &w0, ws[18], p[27]); + word3_muladd(&w2, &w1, &w0, ws[19], p[26]); + word3_muladd(&w2, &w1, &w0, ws[20], p[25]); + word3_muladd(&w2, &w1, &w0, ws[21], p[24]); + word3_muladd(&w2, &w1, &w0, ws[22], p[23]); + word3_muladd(&w2, &w1, &w0, ws[23], p[22]); + word3_muladd(&w2, &w1, &w0, ws[24], p[21]); + word3_muladd(&w2, &w1, &w0, ws[25], p[20]); + word3_muladd(&w2, &w1, &w0, ws[26], p[19]); + word3_muladd(&w2, &w1, &w0, ws[27], p[18]); + word3_muladd(&w2, &w1, &w0, ws[28], p[17]); + word3_muladd(&w2, &w1, &w0, ws[29], p[16]); + word3_muladd(&w2, &w1, &w0, ws[30], p[15]); + word3_muladd(&w2, &w1, &w0, ws[31], p[14]); + word3_add(&w2, &w1, &w0, z[45]); + ws[13] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[15], p[31]); + word3_muladd(&w2, &w1, &w0, ws[16], p[30]); + word3_muladd(&w2, &w1, &w0, ws[17], p[29]); + word3_muladd(&w2, &w1, &w0, ws[18], p[28]); + word3_muladd(&w2, &w1, &w0, ws[19], p[27]); + word3_muladd(&w2, &w1, &w0, ws[20], p[26]); + word3_muladd(&w2, &w1, &w0, ws[21], p[25]); + word3_muladd(&w2, &w1, &w0, ws[22], p[24]); + word3_muladd(&w2, &w1, &w0, ws[23], p[23]); + word3_muladd(&w2, &w1, &w0, ws[24], p[22]); + word3_muladd(&w2, &w1, &w0, ws[25], p[21]); + word3_muladd(&w2, &w1, &w0, ws[26], p[20]); + word3_muladd(&w2, &w1, &w0, ws[27], p[19]); + word3_muladd(&w2, &w1, &w0, ws[28], p[18]); + word3_muladd(&w2, &w1, &w0, ws[29], p[17]); + word3_muladd(&w2, &w1, &w0, ws[30], p[16]); + word3_muladd(&w2, &w1, &w0, ws[31], p[15]); + word3_add(&w2, &w1, &w0, z[46]); + ws[14] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[16], p[31]); + word3_muladd(&w2, &w1, &w0, ws[17], p[30]); + word3_muladd(&w2, &w1, &w0, ws[18], p[29]); + word3_muladd(&w2, &w1, &w0, ws[19], p[28]); + word3_muladd(&w2, &w1, &w0, ws[20], p[27]); + word3_muladd(&w2, &w1, &w0, ws[21], p[26]); + word3_muladd(&w2, &w1, &w0, ws[22], p[25]); + word3_muladd(&w2, &w1, &w0, ws[23], p[24]); + word3_muladd(&w2, &w1, &w0, ws[24], p[23]); + word3_muladd(&w2, &w1, &w0, ws[25], p[22]); + word3_muladd(&w2, &w1, &w0, ws[26], p[21]); + word3_muladd(&w2, &w1, &w0, ws[27], p[20]); + word3_muladd(&w2, &w1, &w0, ws[28], p[19]); + word3_muladd(&w2, &w1, &w0, ws[29], p[18]); + word3_muladd(&w2, &w1, &w0, ws[30], p[17]); + word3_muladd(&w2, &w1, &w0, ws[31], p[16]); + word3_add(&w2, &w1, &w0, z[47]); + ws[15] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[17], p[31]); + word3_muladd(&w2, &w1, &w0, ws[18], p[30]); + word3_muladd(&w2, &w1, &w0, ws[19], p[29]); + word3_muladd(&w2, &w1, &w0, ws[20], p[28]); + word3_muladd(&w2, &w1, &w0, ws[21], p[27]); + word3_muladd(&w2, &w1, &w0, ws[22], p[26]); + word3_muladd(&w2, &w1, &w0, ws[23], p[25]); + word3_muladd(&w2, &w1, &w0, ws[24], p[24]); + word3_muladd(&w2, &w1, &w0, ws[25], p[23]); + word3_muladd(&w2, &w1, &w0, ws[26], p[22]); + word3_muladd(&w2, &w1, &w0, ws[27], p[21]); + word3_muladd(&w2, &w1, &w0, ws[28], p[20]); + word3_muladd(&w2, &w1, &w0, ws[29], p[19]); + word3_muladd(&w2, &w1, &w0, ws[30], p[18]); + word3_muladd(&w2, &w1, &w0, ws[31], p[17]); + word3_add(&w2, &w1, &w0, z[48]); + ws[16] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[18], p[31]); + word3_muladd(&w2, &w1, &w0, ws[19], p[30]); + word3_muladd(&w2, &w1, &w0, ws[20], p[29]); + word3_muladd(&w2, &w1, &w0, ws[21], p[28]); + word3_muladd(&w2, &w1, &w0, ws[22], p[27]); + word3_muladd(&w2, &w1, &w0, ws[23], p[26]); + word3_muladd(&w2, &w1, &w0, ws[24], p[25]); + word3_muladd(&w2, &w1, &w0, ws[25], p[24]); + word3_muladd(&w2, &w1, &w0, ws[26], p[23]); + word3_muladd(&w2, &w1, &w0, ws[27], p[22]); + word3_muladd(&w2, &w1, &w0, ws[28], p[21]); + word3_muladd(&w2, &w1, &w0, ws[29], p[20]); + word3_muladd(&w2, &w1, &w0, ws[30], p[19]); + word3_muladd(&w2, &w1, &w0, ws[31], p[18]); + word3_add(&w2, &w1, &w0, z[49]); + ws[17] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[19], p[31]); + word3_muladd(&w2, &w1, &w0, ws[20], p[30]); + word3_muladd(&w2, &w1, &w0, ws[21], p[29]); + word3_muladd(&w2, &w1, &w0, ws[22], p[28]); + word3_muladd(&w2, &w1, &w0, ws[23], p[27]); + word3_muladd(&w2, &w1, &w0, ws[24], p[26]); + word3_muladd(&w2, &w1, &w0, ws[25], p[25]); + word3_muladd(&w2, &w1, &w0, ws[26], p[24]); + word3_muladd(&w2, &w1, &w0, ws[27], p[23]); + word3_muladd(&w2, &w1, &w0, ws[28], p[22]); + word3_muladd(&w2, &w1, &w0, ws[29], p[21]); + word3_muladd(&w2, &w1, &w0, ws[30], p[20]); + word3_muladd(&w2, &w1, &w0, ws[31], p[19]); + word3_add(&w2, &w1, &w0, z[50]); + ws[18] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[20], p[31]); + word3_muladd(&w2, &w1, &w0, ws[21], p[30]); + word3_muladd(&w2, &w1, &w0, ws[22], p[29]); + word3_muladd(&w2, &w1, &w0, ws[23], p[28]); + word3_muladd(&w2, &w1, &w0, ws[24], p[27]); + word3_muladd(&w2, &w1, &w0, ws[25], p[26]); + word3_muladd(&w2, &w1, &w0, ws[26], p[25]); + word3_muladd(&w2, &w1, &w0, ws[27], p[24]); + word3_muladd(&w2, &w1, &w0, ws[28], p[23]); + word3_muladd(&w2, &w1, &w0, ws[29], p[22]); + word3_muladd(&w2, &w1, &w0, ws[30], p[21]); + word3_muladd(&w2, &w1, &w0, ws[31], p[20]); + word3_add(&w2, &w1, &w0, z[51]); + ws[19] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[21], p[31]); + word3_muladd(&w2, &w1, &w0, ws[22], p[30]); + word3_muladd(&w2, &w1, &w0, ws[23], p[29]); + word3_muladd(&w2, &w1, &w0, ws[24], p[28]); + word3_muladd(&w2, &w1, &w0, ws[25], p[27]); + word3_muladd(&w2, &w1, &w0, ws[26], p[26]); + word3_muladd(&w2, &w1, &w0, ws[27], p[25]); + word3_muladd(&w2, &w1, &w0, ws[28], p[24]); + word3_muladd(&w2, &w1, &w0, ws[29], p[23]); + word3_muladd(&w2, &w1, &w0, ws[30], p[22]); + word3_muladd(&w2, &w1, &w0, ws[31], p[21]); + word3_add(&w2, &w1, &w0, z[52]); + ws[20] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[22], p[31]); + word3_muladd(&w2, &w1, &w0, ws[23], p[30]); + word3_muladd(&w2, &w1, &w0, ws[24], p[29]); + word3_muladd(&w2, &w1, &w0, ws[25], p[28]); + word3_muladd(&w2, &w1, &w0, ws[26], p[27]); + word3_muladd(&w2, &w1, &w0, ws[27], p[26]); + word3_muladd(&w2, &w1, &w0, ws[28], p[25]); + word3_muladd(&w2, &w1, &w0, ws[29], p[24]); + word3_muladd(&w2, &w1, &w0, ws[30], p[23]); + word3_muladd(&w2, &w1, &w0, ws[31], p[22]); + word3_add(&w2, &w1, &w0, z[53]); + ws[21] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[23], p[31]); + word3_muladd(&w2, &w1, &w0, ws[24], p[30]); + word3_muladd(&w2, &w1, &w0, ws[25], p[29]); + word3_muladd(&w2, &w1, &w0, ws[26], p[28]); + word3_muladd(&w2, &w1, &w0, ws[27], p[27]); + word3_muladd(&w2, &w1, &w0, ws[28], p[26]); + word3_muladd(&w2, &w1, &w0, ws[29], p[25]); + word3_muladd(&w2, &w1, &w0, ws[30], p[24]); + word3_muladd(&w2, &w1, &w0, ws[31], p[23]); + word3_add(&w2, &w1, &w0, z[54]); + ws[22] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[24], p[31]); + word3_muladd(&w2, &w1, &w0, ws[25], p[30]); + word3_muladd(&w2, &w1, &w0, ws[26], p[29]); + word3_muladd(&w2, &w1, &w0, ws[27], p[28]); + word3_muladd(&w2, &w1, &w0, ws[28], p[27]); + word3_muladd(&w2, &w1, &w0, ws[29], p[26]); + word3_muladd(&w2, &w1, &w0, ws[30], p[25]); + word3_muladd(&w2, &w1, &w0, ws[31], p[24]); + word3_add(&w2, &w1, &w0, z[55]); + ws[23] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[25], p[31]); + word3_muladd(&w2, &w1, &w0, ws[26], p[30]); + word3_muladd(&w2, &w1, &w0, ws[27], p[29]); + word3_muladd(&w2, &w1, &w0, ws[28], p[28]); + word3_muladd(&w2, &w1, &w0, ws[29], p[27]); + word3_muladd(&w2, &w1, &w0, ws[30], p[26]); + word3_muladd(&w2, &w1, &w0, ws[31], p[25]); + word3_add(&w2, &w1, &w0, z[56]); + ws[24] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[26], p[31]); + word3_muladd(&w2, &w1, &w0, ws[27], p[30]); + word3_muladd(&w2, &w1, &w0, ws[28], p[29]); + word3_muladd(&w2, &w1, &w0, ws[29], p[28]); + word3_muladd(&w2, &w1, &w0, ws[30], p[27]); + word3_muladd(&w2, &w1, &w0, ws[31], p[26]); + word3_add(&w2, &w1, &w0, z[57]); + ws[25] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[27], p[31]); + word3_muladd(&w2, &w1, &w0, ws[28], p[30]); + word3_muladd(&w2, &w1, &w0, ws[29], p[29]); + word3_muladd(&w2, &w1, &w0, ws[30], p[28]); + word3_muladd(&w2, &w1, &w0, ws[31], p[27]); + word3_add(&w2, &w1, &w0, z[58]); + ws[26] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[28], p[31]); + word3_muladd(&w2, &w1, &w0, ws[29], p[30]); + word3_muladd(&w2, &w1, &w0, ws[30], p[29]); + word3_muladd(&w2, &w1, &w0, ws[31], p[28]); + word3_add(&w2, &w1, &w0, z[59]); + ws[27] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[29], p[31]); + word3_muladd(&w2, &w1, &w0, ws[30], p[30]); + word3_muladd(&w2, &w1, &w0, ws[31], p[29]); + word3_add(&w2, &w1, &w0, z[60]); + ws[28] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[30], p[31]); + word3_muladd(&w2, &w1, &w0, ws[31], p[30]); + word3_add(&w2, &w1, &w0, z[61]); + ws[29] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_muladd(&w2, &w1, &w0, ws[31], p[31]); + word3_add(&w2, &w1, &w0, z[62]); + ws[30] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_add(&w2, &w1, &w0, z[63]); + ws[31] = w0; + w0 = w1; w1 = w2; w2 = 0; + word3_add(&w2, &w1, &w0, z[65]); + ws[32] = w0; + ws[33] = w1; + word borrow = bigint_sub3(ws + 32 + 1, ws, 32 + 1, p, 32); + CT::conditional_copy_mem(borrow, z, ws, ws + 33, 33); + clear_mem(z + 32, 2*(32+1) - 32); + } + +} diff --git a/comm/third_party/botan/src/lib/math/numbertheory/curve_nistp.h b/comm/third_party/botan/src/lib/math/numbertheory/curve_nistp.h new file mode 100644 index 0000000000..19d1bd2566 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/curve_nistp.h @@ -0,0 +1,49 @@ +/* +* Arithmetic operations specialized for NIST ECC primes +* (C) 2014,2015 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_NIST_PRIMES_H_ +#define BOTAN_NIST_PRIMES_H_ + +#include <botan/bigint.h> + +BOTAN_FUTURE_INTERNAL_HEADER(curve_nistp.h) + +namespace Botan { + +/** +* NIST Prime reduction functions. +* +* Reduces the value in place +* +* ws is a workspace function which is used as a temporary, +* and will be resized as needed. +*/ +BOTAN_PUBLIC_API(2,0) const BigInt& prime_p521(); +BOTAN_PUBLIC_API(2,0) void redc_p521(BigInt& x, secure_vector<word>& ws); + +/* +Previously this macro indicated if the P-{192,224,256,384} reducers +were available. Now they are always enabled and this macro has no meaning. +The define will be removed in a future major release. +*/ +#define BOTAN_HAS_NIST_PRIME_REDUCERS_W32 + +BOTAN_PUBLIC_API(2,0) const BigInt& prime_p384(); +BOTAN_PUBLIC_API(2,0) void redc_p384(BigInt& x, secure_vector<word>& ws); + +BOTAN_PUBLIC_API(2,0) const BigInt& prime_p256(); +BOTAN_PUBLIC_API(2,0) void redc_p256(BigInt& x, secure_vector<word>& ws); + +BOTAN_PUBLIC_API(2,0) const BigInt& prime_p224(); +BOTAN_PUBLIC_API(2,0) void redc_p224(BigInt& x, secure_vector<word>& ws); + +BOTAN_PUBLIC_API(2,0) const BigInt& prime_p192(); +BOTAN_PUBLIC_API(2,0) void redc_p192(BigInt& x, secure_vector<word>& ws); + +} + +#endif diff --git a/comm/third_party/botan/src/lib/math/numbertheory/dsa_gen.cpp b/comm/third_party/botan/src/lib/math/numbertheory/dsa_gen.cpp new file mode 100644 index 0000000000..a5efbc2662 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/dsa_gen.cpp @@ -0,0 +1,136 @@ +/* +* DSA Parameter Generation +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/numthry.h> +#include <botan/hash.h> +#include <botan/reducer.h> +#include <botan/rng.h> + +namespace Botan { + +namespace { + +/* +* Check if this size is allowed by FIPS 186-3 +*/ +bool fips186_3_valid_size(size_t pbits, size_t qbits) + { + if(qbits == 160) + return (pbits == 1024); + + if(qbits == 224) + return (pbits == 2048); + + if(qbits == 256) + return (pbits == 2048 || pbits == 3072); + + return false; + } + +} + +/* +* Attempt DSA prime generation with given seed +*/ +bool generate_dsa_primes(RandomNumberGenerator& rng, + BigInt& p, BigInt& q, + size_t pbits, size_t qbits, + const std::vector<uint8_t>& seed_c, + size_t offset) + { + if(!fips186_3_valid_size(pbits, qbits)) + throw Invalid_Argument( + "FIPS 186-3 does not allow DSA domain parameters of " + + std::to_string(pbits) + "/" + std::to_string(qbits) + " bits long"); + + if(seed_c.size() * 8 < qbits) + throw Invalid_Argument( + "Generating a DSA parameter set with a " + std::to_string(qbits) + + " bit long q requires a seed at least as many bits long"); + + const std::string hash_name = "SHA-" + std::to_string(qbits); + std::unique_ptr<HashFunction> hash(HashFunction::create_or_throw(hash_name)); + + const size_t HASH_SIZE = hash->output_length(); + + class Seed final + { + public: + explicit Seed(const std::vector<uint8_t>& s) : m_seed(s) {} + + const std::vector<uint8_t>& value() const { return m_seed; } + + Seed& operator++() + { + for(size_t j = m_seed.size(); j > 0; --j) + if(++m_seed[j-1]) + break; + return (*this); + } + private: + std::vector<uint8_t> m_seed; + }; + + Seed seed(seed_c); + + q.binary_decode(hash->process(seed.value())); + q.set_bit(qbits-1); + q.set_bit(0); + + if(!is_prime(q, rng, 128, true)) + return false; + + const size_t n = (pbits-1) / (HASH_SIZE * 8), + b = (pbits-1) % (HASH_SIZE * 8); + + BigInt X; + std::vector<uint8_t> V(HASH_SIZE * (n+1)); + + Modular_Reducer mod_2q(2*q); + + for(size_t j = 0; j != 4*pbits; ++j) + { + for(size_t k = 0; k <= n; ++k) + { + ++seed; + hash->update(seed.value()); + hash->final(&V[HASH_SIZE * (n-k)]); + } + + if(j >= offset) + { + X.binary_decode(&V[HASH_SIZE - 1 - b/8], + V.size() - (HASH_SIZE - 1 - b/8)); + X.set_bit(pbits-1); + + p = X - (mod_2q.reduce(X) - 1); + + if(p.bits() == pbits && is_prime(p, rng, 128, true)) + return true; + } + } + return false; + } + +/* +* Generate DSA Primes +*/ +std::vector<uint8_t> generate_dsa_primes(RandomNumberGenerator& rng, + BigInt& p, BigInt& q, + size_t pbits, size_t qbits) + { + while(true) + { + std::vector<uint8_t> seed(qbits / 8); + rng.randomize(seed.data(), seed.size()); + + if(generate_dsa_primes(rng, p, q, pbits, qbits, seed)) + return seed; + } + } + +} diff --git a/comm/third_party/botan/src/lib/math/numbertheory/info.txt b/comm/third_party/botan/src/lib/math/numbertheory/info.txt new file mode 100644 index 0000000000..4b241c1208 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/info.txt @@ -0,0 +1,22 @@ +<defines> +NUMBERTHEORY -> 20131128 +</defines> + +<header:public> +curve_nistp.h +numthry.h +pow_mod.h +reducer.h +monty.h +</header:public> + +<header:internal> +primality.h +monty_exp.h +</header:internal> + +<requires> +bigint +hash +rng +</requires> diff --git a/comm/third_party/botan/src/lib/math/numbertheory/jacobi.cpp b/comm/third_party/botan/src/lib/math/numbertheory/jacobi.cpp new file mode 100644 index 0000000000..284fc2b204 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/jacobi.cpp @@ -0,0 +1,52 @@ +/* +* Jacobi Function +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/numthry.h> + +namespace Botan { + +/* +* Calculate the Jacobi symbol +*/ +int32_t jacobi(const BigInt& a, const BigInt& n) + { + if(n.is_even() || n < 2) + throw Invalid_Argument("jacobi: second argument must be odd and > 1"); + + BigInt x = a % n; + BigInt y = n; + int32_t J = 1; + + while(y > 1) + { + x %= y; + if(x > y / 2) + { + x = y - x; + if(y % 4 == 3) + J = -J; + } + if(x.is_zero()) + return 0; + + size_t shifts = low_zero_bits(x); + x >>= shifts; + if(shifts % 2) + { + word y_mod_8 = y % 8; + if(y_mod_8 == 3 || y_mod_8 == 5) + J = -J; + } + + if(x % 4 == 3 && y % 4 == 3) + J = -J; + std::swap(x, y); + } + return J; + } + +} diff --git a/comm/third_party/botan/src/lib/math/numbertheory/make_prm.cpp b/comm/third_party/botan/src/lib/math/numbertheory/make_prm.cpp new file mode 100644 index 0000000000..404e301046 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/make_prm.cpp @@ -0,0 +1,293 @@ +/* +* Prime Generation +* (C) 1999-2007,2018,2019 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/numthry.h> +#include <botan/rng.h> +#include <botan/internal/bit_ops.h> +#include <botan/loadstor.h> +#include <botan/reducer.h> +#include <botan/internal/primality.h> +#include <algorithm> + +namespace Botan { + +namespace { + +class Prime_Sieve final + { + public: + Prime_Sieve(const BigInt& init_value, size_t sieve_size) : + m_sieve(std::min(sieve_size, PRIME_TABLE_SIZE)) + { + for(size_t i = 0; i != m_sieve.size(); ++i) + m_sieve[i] = static_cast<uint16_t>(init_value % PRIMES[i]); + } + + void step(word increment) + { + for(size_t i = 0; i != m_sieve.size(); ++i) + { + m_sieve[i] = (m_sieve[i] + increment) % PRIMES[i]; + } + } + + bool passes(bool check_2p1 = false) const + { + for(size_t i = 0; i != m_sieve.size(); ++i) + { + /* + In this case, p is a multiple of PRIMES[i] + */ + if(m_sieve[i] == 0) + return false; + + if(check_2p1) + { + /* + In this case, 2*p+1 will be a multiple of PRIMES[i] + + So if potentially generating a safe prime, we want to + avoid this value because 2*p+1 will certainly not be prime. + + See "Safe Prime Generation with a Combined Sieve" M. Wiener + https://eprint.iacr.org/2003/186.pdf + */ + if(m_sieve[i] == (PRIMES[i] - 1) / 2) + return false; + } + } + + return true; + } + + private: + std::vector<uint16_t> m_sieve; + }; + +} + + +/* +* Generate a random prime +*/ +BigInt random_prime(RandomNumberGenerator& rng, + size_t bits, const BigInt& coprime, + size_t equiv, size_t modulo, + size_t prob) + { + if(bits <= 1) + { + throw Invalid_Argument("random_prime: Can't make a prime of " + + std::to_string(bits) + " bits"); + } + if(coprime.is_negative() || (!coprime.is_zero() && coprime.is_even()) || coprime.bits() >= bits) + { + throw Invalid_Argument("random_prime: invalid coprime"); + } + if(modulo == 0) + { + throw Invalid_Argument("random_prime: Invalid modulo value"); + } + + equiv %= modulo; + + if(equiv == 0) + throw Invalid_Argument("random_prime Invalid value for equiv/modulo"); + + // Handle small values: + + if(bits <= 16) + { + if(equiv != 1 || modulo != 2 || coprime != 0) + throw Not_Implemented("random_prime equiv/modulo/coprime options not usable for small primes"); + + if(bits == 2) + { + return ((rng.next_byte() % 2) ? 2 : 3); + } + else if(bits == 3) + { + return ((rng.next_byte() % 2) ? 5 : 7); + } + else if(bits == 4) + { + return ((rng.next_byte() % 2) ? 11 : 13); + } + else + { + for(;;) + { + // This is slightly biased, but for small primes it does not seem to matter + uint8_t b[4]; + rng.randomize(b, 4); + const size_t idx = load_le<uint32_t>(b, 0) % PRIME_TABLE_SIZE; + const uint16_t small_prime = PRIMES[idx]; + + if(high_bit(small_prime) == bits) + return small_prime; + } + } + } + + const size_t MAX_ATTEMPTS = 32*1024; + + const size_t mr_trials = miller_rabin_test_iterations(bits, prob, true); + + while(true) + { + BigInt p(rng, bits); + + // Force lowest and two top bits on + p.set_bit(bits - 1); + p.set_bit(bits - 2); + p.set_bit(0); + + // Force p to be equal to equiv mod modulo + p += (modulo - (p % modulo)) + equiv; + + Prime_Sieve sieve(p, bits); + + for(size_t attempt = 0; attempt <= MAX_ATTEMPTS; ++attempt) + { + p += modulo; + + sieve.step(modulo); + + // p can be even if modulo is odd, continue on in that case + if(p.is_even() || sieve.passes(true) == false) + continue; + + Modular_Reducer mod_p(p); + + if(coprime > 1) + { + /* + First do a single M-R iteration to quickly elimate most non-primes, + before doing the coprimality check which is expensive + */ + if(is_miller_rabin_probable_prime(p, mod_p, rng, 1) == false) + continue; + + /* + * Check if p - 1 and coprime are relatively prime, using gcd. + * The gcd computation is const-time + */ + if(gcd(p - 1, coprime) > 1) + continue; + } + + if(p.bits() > bits) + break; + + if(is_miller_rabin_probable_prime(p, mod_p, rng, mr_trials) == false) + continue; + + if(prob > 32 && !is_lucas_probable_prime(p, mod_p)) + continue; + + return p; + } + } + } + +BigInt generate_rsa_prime(RandomNumberGenerator& keygen_rng, + RandomNumberGenerator& prime_test_rng, + size_t bits, + const BigInt& coprime, + size_t prob) + { + if(bits < 512) + throw Invalid_Argument("generate_rsa_prime bits too small"); + + /* + * The restriction on coprime <= 64 bits is arbitrary but generally speaking + * very large RSA public exponents are a bad idea both for performance and due + * to attacks on small d. + */ + if(coprime <= 1 || coprime.is_even() || coprime.bits() > 64) + throw Invalid_Argument("generate_rsa_prime coprime must be small odd positive integer"); + + const size_t MAX_ATTEMPTS = 32*1024; + + const size_t mr_trials = miller_rabin_test_iterations(bits, prob, true); + + while(true) + { + BigInt p(keygen_rng, bits); + + // Force high two bits so multiplication always results in expected n bit integer + p.set_bit(bits - 1); + p.set_bit(bits - 2); + p.set_bit(0); + + const word step = 2; + + Prime_Sieve sieve(p, bits); + + for(size_t attempt = 0; attempt <= MAX_ATTEMPTS; ++attempt) + { + p += step; + + sieve.step(step); + + if(sieve.passes() == false) + continue; + + Modular_Reducer mod_p(p); + + /* + * Do a single primality test first before checking coprimality, since + * currently a single Miller-Rabin test is faster than computing gcd, + * and this eliminates almost all wasted gcd computations. + */ + if(is_miller_rabin_probable_prime(p, mod_p, prime_test_rng, 1) == false) + continue; + + /* + * Check if p - 1 and coprime are relatively prime. + */ + if(gcd(p - 1, coprime) > 1) + continue; + + if(p.bits() > bits) + break; + + if(is_miller_rabin_probable_prime(p, mod_p, prime_test_rng, mr_trials) == true) + return p; + } + } + } + +/* +* Generate a random safe prime +*/ +BigInt random_safe_prime(RandomNumberGenerator& rng, size_t bits) + { + if(bits <= 64) + throw Invalid_Argument("random_safe_prime: Can't make a prime of " + + std::to_string(bits) + " bits"); + + const size_t error_bound = 128; + + BigInt q, p; + for(;;) + { + /* + Generate q == 2 (mod 3), since otherwise [in the case of q == 1 (mod 3)], + 2*q+1 == 3 (mod 3) and so certainly not prime. + */ + q = random_prime(rng, bits - 1, 0, 2, 3, error_bound); + p = (q << 1) + 1; + + if(is_prime(p, rng, error_bound, true)) + { + return p; + } + } + } + +} diff --git a/comm/third_party/botan/src/lib/math/numbertheory/mod_inv.cpp b/comm/third_party/botan/src/lib/math/numbertheory/mod_inv.cpp new file mode 100644 index 0000000000..ec3bb33f00 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/mod_inv.cpp @@ -0,0 +1,356 @@ +/* +* (C) 1999-2011,2016,2018,2019,2020 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/numthry.h> +#include <botan/divide.h> +#include <botan/internal/ct_utils.h> +#include <botan/internal/mp_core.h> +#include <botan/internal/rounding.h> + +namespace Botan { + +/* +Sets result to a^-1 * 2^k mod a +with n <= k <= 2n +Returns k + +"The Montgomery Modular Inverse - Revisited" Çetin Koç, E. Savas +https://citeseerx.ist.psu.edu/viewdoc/citations?doi=10.1.1.75.8377 + +A const time implementation of this algorithm is described in +"Constant Time Modular Inversion" Joppe W. Bos +http://www.joppebos.com/files/CTInversion.pdf +*/ +size_t almost_montgomery_inverse(BigInt& result, + const BigInt& a, + const BigInt& p) + { + size_t k = 0; + + BigInt u = p, v = a, r = 0, s = 1; + + while(v > 0) + { + if(u.is_even()) + { + u >>= 1; + s <<= 1; + } + else if(v.is_even()) + { + v >>= 1; + r <<= 1; + } + else if(u > v) + { + u -= v; + u >>= 1; + r += s; + s <<= 1; + } + else + { + v -= u; + v >>= 1; + s += r; + r <<= 1; + } + + ++k; + } + + if(r >= p) + { + r -= p; + } + + result = p - r; + + return k; + } + +BigInt normalized_montgomery_inverse(const BigInt& a, const BigInt& p) + { + BigInt r; + size_t k = almost_montgomery_inverse(r, a, p); + + for(size_t i = 0; i != k; ++i) + { + if(r.is_odd()) + r += p; + r >>= 1; + } + + return r; + } + +namespace { + +BigInt inverse_mod_odd_modulus(const BigInt& n, const BigInt& mod) + { + // Caller should assure these preconditions: + BOTAN_DEBUG_ASSERT(n.is_positive()); + BOTAN_DEBUG_ASSERT(mod.is_positive()); + BOTAN_DEBUG_ASSERT(n < mod); + BOTAN_DEBUG_ASSERT(mod >= 3 && mod.is_odd()); + + /* + This uses a modular inversion algorithm designed by Niels Möller + and implemented in Nettle. The same algorithm was later also + adapted to GMP in mpn_sec_invert. + + It can be easily implemented in a way that does not depend on + secret branches or memory lookups, providing resistance against + some forms of side channel attack. + + There is also a description of the algorithm in Appendix 5 of "Fast + Software Polynomial Multiplication on ARM Processors using the NEON Engine" + by Danilo Câmara, Conrado P. L. Gouvêa, Julio López, and Ricardo + Dahab in LNCS 8182 + https://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf + + Thanks to Niels for creating the algorithm, explaining some things + about it, and the reference to the paper. + */ + + const size_t mod_words = mod.sig_words(); + BOTAN_ASSERT(mod_words > 0, "Not empty"); + + secure_vector<word> tmp_mem(5*mod_words); + + word* v_w = &tmp_mem[0]; + word* u_w = &tmp_mem[1*mod_words]; + word* b_w = &tmp_mem[2*mod_words]; + word* a_w = &tmp_mem[3*mod_words]; + word* mp1o2 = &tmp_mem[4*mod_words]; + + CT::poison(tmp_mem.data(), tmp_mem.size()); + + copy_mem(a_w, n.data(), std::min(n.size(), mod_words)); + copy_mem(b_w, mod.data(), std::min(mod.size(), mod_words)); + u_w[0] = 1; + // v_w = 0 + + // compute (mod + 1) / 2 which [because mod is odd] is equal to + // (mod / 2) + 1 + copy_mem(mp1o2, mod.data(), std::min(mod.size(), mod_words)); + bigint_shr1(mp1o2, mod_words, 0, 1); + word carry = bigint_add2_nc(mp1o2, mod_words, u_w, 1); + BOTAN_ASSERT_NOMSG(carry == 0); + + // Only n.bits() + mod.bits() iterations are required, but avoid leaking the size of n + const size_t execs = 2 * mod.bits(); + + for(size_t i = 0; i != execs; ++i) + { + const word odd_a = a_w[0] & 1; + + //if(odd_a) a -= b + word underflow = bigint_cnd_sub(odd_a, a_w, b_w, mod_words); + + //if(underflow) { b -= a; a = abs(a); swap(u, v); } + bigint_cnd_add(underflow, b_w, a_w, mod_words); + bigint_cnd_abs(underflow, a_w, mod_words); + bigint_cnd_swap(underflow, u_w, v_w, mod_words); + + // a >>= 1 + bigint_shr1(a_w, mod_words, 0, 1); + + //if(odd_a) u -= v; + word borrow = bigint_cnd_sub(odd_a, u_w, v_w, mod_words); + + // if(borrow) u += p + bigint_cnd_add(borrow, u_w, mod.data(), mod_words); + + const word odd_u = u_w[0] & 1; + + // u >>= 1 + bigint_shr1(u_w, mod_words, 0, 1); + + //if(odd_u) u += mp1o2; + bigint_cnd_add(odd_u, u_w, mp1o2, mod_words); + } + + auto a_is_0 = CT::Mask<word>::set(); + for(size_t i = 0; i != mod_words; ++i) + a_is_0 &= CT::Mask<word>::is_zero(a_w[i]); + + auto b_is_1 = CT::Mask<word>::is_equal(b_w[0], 1); + for(size_t i = 1; i != mod_words; ++i) + b_is_1 &= CT::Mask<word>::is_zero(b_w[i]); + + BOTAN_ASSERT(a_is_0.is_set(), "A is zero"); + + // if b != 1 then gcd(n,mod) > 1 and inverse does not exist + // in which case zero out the result to indicate this + (~b_is_1).if_set_zero_out(v_w, mod_words); + + /* + * We've placed the result in the lowest words of the temp buffer. + * So just clear out the other values and then give that buffer to a + * BigInt. + */ + clear_mem(&tmp_mem[mod_words], 4*mod_words); + + CT::unpoison(tmp_mem.data(), tmp_mem.size()); + + BigInt r; + r.swap_reg(tmp_mem); + return r; + } + +BigInt inverse_mod_pow2(const BigInt& a1, size_t k) + { + /* + * From "A New Algorithm for Inversion mod p^k" by Çetin Kaya Koç + * https://eprint.iacr.org/2017/411.pdf sections 5 and 7. + */ + + if(a1.is_even()) + return 0; + + BigInt a = a1; + a.mask_bits(k); + + BigInt b = 1; + BigInt X = 0; + BigInt newb; + + const size_t a_words = a.sig_words(); + + X.grow_to(round_up(k, BOTAN_MP_WORD_BITS) / BOTAN_MP_WORD_BITS); + b.grow_to(a_words); + + /* + Hide the exact value of k. k is anyway known to word length + granularity because of the length of a, so no point in doing more + than this. + */ + const size_t iter = round_up(k, BOTAN_MP_WORD_BITS); + + for(size_t i = 0; i != iter; ++i) + { + const bool b0 = b.get_bit(0); + X.conditionally_set_bit(i, b0); + newb = b - a; + b.ct_cond_assign(b0, newb); + b >>= 1; + } + + X.mask_bits(k); + X.const_time_unpoison(); + return X; + } + +} + +BigInt inverse_mod(const BigInt& n, const BigInt& mod) + { + if(mod.is_zero()) + throw BigInt::DivideByZero(); + if(mod.is_negative() || n.is_negative()) + throw Invalid_Argument("inverse_mod: arguments must be non-negative"); + if(n.is_zero() || (n.is_even() && mod.is_even())) + return 0; + + if(mod.is_odd()) + { + /* + Fastpath for common case. This leaks information if n > mod + but we don't guarantee const time behavior in that case. + */ + if(n < mod) + return inverse_mod_odd_modulus(n, mod); + else + return inverse_mod_odd_modulus(ct_modulo(n, mod), mod); + } + + const size_t mod_lz = low_zero_bits(mod); + BOTAN_ASSERT_NOMSG(mod_lz > 0); + const size_t mod_bits = mod.bits(); + BOTAN_ASSERT_NOMSG(mod_bits > mod_lz); + + if(mod_lz == mod_bits - 1) + { + // In this case we are performing an inversion modulo 2^k + return inverse_mod_pow2(n, mod_lz); + } + + /* + * In this case we are performing an inversion modulo 2^k*o for + * some k > 1 and some odd (not necessarily prime) integer. + * Compute the inversions modulo 2^k and modulo o, then combine them + * using CRT, which is possible because 2^k and o are relatively prime. + */ + + const BigInt o = mod >> mod_lz; + const BigInt n_redc = ct_modulo(n, o); + const BigInt inv_o = inverse_mod_odd_modulus(n_redc, o); + const BigInt inv_2k = inverse_mod_pow2(n, mod_lz); + + // No modular inverse in this case: + if(inv_o == 0 || inv_2k == 0) + return 0; + + const BigInt m2k = BigInt::power_of_2(mod_lz); + // Compute the CRT parameter + const BigInt c = inverse_mod_pow2(o, mod_lz); + + // Compute h = c*(inv_2k-inv_o) mod 2^k + BigInt h = c * (inv_2k - inv_o); + const bool h_neg = h.is_negative(); + h.set_sign(BigInt::Positive); + h.mask_bits(mod_lz); + const bool h_nonzero = h.is_nonzero(); + h.ct_cond_assign(h_nonzero && h_neg, m2k - h); + + // Return result inv_o + h * o + h *= o; + h += inv_o; + return h; + } + +// Deprecated forwarding functions: +BigInt inverse_euclid(const BigInt& x, const BigInt& modulus) + { + return inverse_mod(x, modulus); + } + +BigInt ct_inverse_mod_odd_modulus(const BigInt& n, const BigInt& mod) + { + return inverse_mod_odd_modulus(n, mod); + } + +word monty_inverse(word a) + { + if(a % 2 == 0) + throw Invalid_Argument("monty_inverse only valid for odd integers"); + + /* + * From "A New Algorithm for Inversion mod p^k" by Çetin Kaya Koç + * https://eprint.iacr.org/2017/411.pdf sections 5 and 7. + */ + + word b = 1; + word r = 0; + + for(size_t i = 0; i != BOTAN_MP_WORD_BITS; ++i) + { + const word bi = b % 2; + r >>= 1; + r += bi << (BOTAN_MP_WORD_BITS - 1); + + b -= a * bi; + b >>= 1; + } + + // Now invert in addition space + r = (MP_WORD_MAX - r) + 1; + + return r; + } + +} diff --git a/comm/third_party/botan/src/lib/math/numbertheory/monty.cpp b/comm/third_party/botan/src/lib/math/numbertheory/monty.cpp new file mode 100644 index 0000000000..ca5eb73dfd --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/monty.cpp @@ -0,0 +1,444 @@ +/* +* (C) 2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/monty.h> +#include <botan/reducer.h> +#include <botan/internal/mp_core.h> + +namespace Botan { + +Montgomery_Params::Montgomery_Params(const BigInt& p, + const Modular_Reducer& mod_p) + { + if(p.is_even() || p < 3) + throw Invalid_Argument("Montgomery_Params invalid modulus"); + + m_p = p; + m_p_words = m_p.sig_words(); + m_p_dash = monty_inverse(m_p.word_at(0)); + + const BigInt r = BigInt::power_of_2(m_p_words * BOTAN_MP_WORD_BITS); + + m_r1 = mod_p.reduce(r); + m_r2 = mod_p.square(m_r1); + m_r3 = mod_p.multiply(m_r1, m_r2); + } + +Montgomery_Params::Montgomery_Params(const BigInt& p) + { + + if(p.is_negative() || p.is_even()) + throw Invalid_Argument("Montgomery_Params invalid modulus"); + + m_p = p; + m_p_words = m_p.sig_words(); + m_p_dash = monty_inverse(m_p.word_at(0)); + + const BigInt r = BigInt::power_of_2(m_p_words * BOTAN_MP_WORD_BITS); + + // It might be faster to use ct_modulo here vs setting up Barrett reduction? + Modular_Reducer mod_p(p); + + m_r1 = mod_p.reduce(r); + m_r2 = mod_p.square(m_r1); + m_r3 = mod_p.multiply(m_r1, m_r2); + } + +BigInt Montgomery_Params::inv_mod_p(const BigInt& x) const + { + // TODO use Montgomery inverse here? + return inverse_mod(x, p()); + } + +BigInt Montgomery_Params::redc(const BigInt& x, secure_vector<word>& ws) const + { + const size_t output_size = 2*m_p_words + 2; + + if(ws.size() < output_size) + ws.resize(output_size); + + BigInt z = x; + z.grow_to(output_size); + + bigint_monty_redc(z.mutable_data(), + m_p.data(), m_p_words, m_p_dash, + ws.data(), ws.size()); + + return z; + } + +BigInt Montgomery_Params::mul(const BigInt& x, const BigInt& y, + secure_vector<word>& ws) const + { + const size_t output_size = 2*m_p_words + 2; + + if(ws.size() < output_size) + ws.resize(output_size); + + BOTAN_DEBUG_ASSERT(x.sig_words() <= m_p_words); + BOTAN_DEBUG_ASSERT(y.sig_words() <= m_p_words); + + BigInt z(BigInt::Positive, output_size); + bigint_mul(z.mutable_data(), z.size(), + x.data(), x.size(), std::min(m_p_words, x.size()), + y.data(), y.size(), std::min(m_p_words, y.size()), + ws.data(), ws.size()); + + bigint_monty_redc(z.mutable_data(), + m_p.data(), m_p_words, m_p_dash, + ws.data(), ws.size()); + + return z; + } + +BigInt Montgomery_Params::mul(const BigInt& x, + const secure_vector<word>& y, + secure_vector<word>& ws) const + { + const size_t output_size = 2*m_p_words + 2; + if(ws.size() < output_size) + ws.resize(output_size); + BigInt z(BigInt::Positive, output_size); + + BOTAN_DEBUG_ASSERT(x.sig_words() <= m_p_words); + + bigint_mul(z.mutable_data(), z.size(), + x.data(), x.size(), std::min(m_p_words, x.size()), + y.data(), y.size(), std::min(m_p_words, y.size()), + ws.data(), ws.size()); + + bigint_monty_redc(z.mutable_data(), + m_p.data(), m_p_words, m_p_dash, + ws.data(), ws.size()); + + return z; + } + +void Montgomery_Params::mul_by(BigInt& x, + const secure_vector<word>& y, + secure_vector<word>& ws) const + { + const size_t output_size = 2*m_p_words + 2; + + if(ws.size() < 2*output_size) + ws.resize(2*output_size); + + word* z_data = &ws[0]; + word* ws_data = &ws[output_size]; + + BOTAN_DEBUG_ASSERT(x.sig_words() <= m_p_words); + + bigint_mul(z_data, output_size, + x.data(), x.size(), std::min(m_p_words, x.size()), + y.data(), y.size(), std::min(m_p_words, y.size()), + ws_data, output_size); + + bigint_monty_redc(z_data, + m_p.data(), m_p_words, m_p_dash, + ws_data, output_size); + + if(x.size() < output_size) + x.grow_to(output_size); + copy_mem(x.mutable_data(), z_data, output_size); + } + +void Montgomery_Params::mul_by(BigInt& x, + const BigInt& y, + secure_vector<word>& ws) const + { + const size_t output_size = 2*m_p_words + 2; + + if(ws.size() < 2*output_size) + ws.resize(2*output_size); + + word* z_data = &ws[0]; + word* ws_data = &ws[output_size]; + + BOTAN_DEBUG_ASSERT(x.sig_words() <= m_p_words); + + bigint_mul(z_data, output_size, + x.data(), x.size(), std::min(m_p_words, x.size()), + y.data(), y.size(), std::min(m_p_words, y.size()), + ws_data, output_size); + + bigint_monty_redc(z_data, + m_p.data(), m_p_words, m_p_dash, + ws_data, output_size); + + if(x.size() < output_size) + x.grow_to(output_size); + copy_mem(x.mutable_data(), z_data, output_size); + } + +BigInt Montgomery_Params::sqr(const BigInt& x, secure_vector<word>& ws) const + { + const size_t output_size = 2*m_p_words + 2; + + if(ws.size() < output_size) + ws.resize(output_size); + + BigInt z(BigInt::Positive, output_size); + + BOTAN_DEBUG_ASSERT(x.sig_words() <= m_p_words); + + bigint_sqr(z.mutable_data(), z.size(), + x.data(), x.size(), std::min(m_p_words, x.size()), + ws.data(), ws.size()); + + bigint_monty_redc(z.mutable_data(), + m_p.data(), m_p_words, m_p_dash, + ws.data(), ws.size()); + + return z; + } + +void Montgomery_Params::square_this(BigInt& x, + secure_vector<word>& ws) const + { + const size_t output_size = 2*m_p_words + 2; + + if(ws.size() < 2*output_size) + ws.resize(2*output_size); + + word* z_data = &ws[0]; + word* ws_data = &ws[output_size]; + + BOTAN_DEBUG_ASSERT(x.sig_words() <= m_p_words); + + bigint_sqr(z_data, output_size, + x.data(), x.size(), std::min(m_p_words, x.size()), + ws_data, output_size); + + bigint_monty_redc(z_data, + m_p.data(), m_p_words, m_p_dash, + ws_data, output_size); + + if(x.size() < output_size) + x.grow_to(output_size); + copy_mem(x.mutable_data(), z_data, output_size); + } + +Montgomery_Int::Montgomery_Int(const std::shared_ptr<const Montgomery_Params> params, + const BigInt& v, + bool redc_needed) : + m_params(params) + { + if(redc_needed == false) + { + m_v = v; + } + else + { + BOTAN_ASSERT_NOMSG(m_v < m_params->p()); + secure_vector<word> ws; + m_v = m_params->mul(v, m_params->R2(), ws); + } + } + +Montgomery_Int::Montgomery_Int(std::shared_ptr<const Montgomery_Params> params, + const uint8_t bits[], size_t len, + bool redc_needed) : + m_params(params), + m_v(bits, len) + { + if(redc_needed) + { + BOTAN_ASSERT_NOMSG(m_v < m_params->p()); + secure_vector<word> ws; + m_v = m_params->mul(m_v, m_params->R2(), ws); + } + } + +Montgomery_Int::Montgomery_Int(std::shared_ptr<const Montgomery_Params> params, + const word words[], size_t len, + bool redc_needed) : + m_params(params), + m_v(words, len) + { + if(redc_needed) + { + BOTAN_ASSERT_NOMSG(m_v < m_params->p()); + secure_vector<word> ws; + m_v = m_params->mul(m_v, m_params->R2(), ws); + } + } + +void Montgomery_Int::fix_size() + { + const size_t p_words = m_params->p_words(); + + if(m_v.sig_words() > p_words) + throw Internal_Error("Montgomery_Int::fix_size v too large"); + + m_v.grow_to(p_words); + } + +bool Montgomery_Int::operator==(const Montgomery_Int& other) const + { + return m_v == other.m_v && m_params->p() == other.m_params->p(); + } + +std::vector<uint8_t> Montgomery_Int::serialize() const + { + std::vector<uint8_t> v(size()); + BigInt::encode_1363(v.data(), v.size(), value()); + return v; + } + +size_t Montgomery_Int::size() const + { + return m_params->p().bytes(); + } + +bool Montgomery_Int::is_one() const + { + return m_v == m_params->R1(); + } + +bool Montgomery_Int::is_zero() const + { + return m_v.is_zero(); + } + +BigInt Montgomery_Int::value() const + { + secure_vector<word> ws; + return m_params->redc(m_v, ws); + } + +Montgomery_Int Montgomery_Int::operator+(const Montgomery_Int& other) const + { + secure_vector<word> ws; + BigInt z = m_v; + z.mod_add(other.m_v, m_params->p(), ws); + return Montgomery_Int(m_params, z, false); + } + +Montgomery_Int Montgomery_Int::operator-(const Montgomery_Int& other) const + { + secure_vector<word> ws; + BigInt z = m_v; + z.mod_sub(other.m_v, m_params->p(), ws); + return Montgomery_Int(m_params, z, false); + } + +Montgomery_Int& Montgomery_Int::operator+=(const Montgomery_Int& other) + { + secure_vector<word> ws; + return this->add(other, ws); + } + +Montgomery_Int& Montgomery_Int::add(const Montgomery_Int& other, secure_vector<word>& ws) + { + m_v.mod_add(other.m_v, m_params->p(), ws); + return (*this); + } + +Montgomery_Int& Montgomery_Int::operator-=(const Montgomery_Int& other) + { + secure_vector<word> ws; + return this->sub(other, ws); + } + +Montgomery_Int& Montgomery_Int::sub(const Montgomery_Int& other, secure_vector<word>& ws) + { + m_v.mod_sub(other.m_v, m_params->p(), ws); + return (*this); + } + +Montgomery_Int Montgomery_Int::operator*(const Montgomery_Int& other) const + { + secure_vector<word> ws; + return Montgomery_Int(m_params, m_params->mul(m_v, other.m_v, ws), false); + } + +Montgomery_Int Montgomery_Int::mul(const Montgomery_Int& other, + secure_vector<word>& ws) const + { + return Montgomery_Int(m_params, m_params->mul(m_v, other.m_v, ws), false); + } + +Montgomery_Int& Montgomery_Int::mul_by(const Montgomery_Int& other, + secure_vector<word>& ws) + { + m_params->mul_by(m_v, other.m_v, ws); + return (*this); + } + +Montgomery_Int& Montgomery_Int::mul_by(const secure_vector<word>& other, + secure_vector<word>& ws) + { + m_params->mul_by(m_v, other, ws); + return (*this); + } + +Montgomery_Int& Montgomery_Int::operator*=(const Montgomery_Int& other) + { + secure_vector<word> ws; + return mul_by(other, ws); + } + +Montgomery_Int& Montgomery_Int::operator*=(const secure_vector<word>& other) + { + secure_vector<word> ws; + return mul_by(other, ws); + } + +Montgomery_Int& Montgomery_Int::square_this_n_times(secure_vector<word>& ws, size_t n) + { + for(size_t i = 0; i != n; ++i) + m_params->square_this(m_v, ws); + return (*this); + } + +Montgomery_Int& Montgomery_Int::square_this(secure_vector<word>& ws) + { + m_params->square_this(m_v, ws); + return (*this); + } + +Montgomery_Int Montgomery_Int::square(secure_vector<word>& ws) const + { + return Montgomery_Int(m_params, m_params->sqr(m_v, ws), false); + } + +Montgomery_Int Montgomery_Int::multiplicative_inverse() const + { + secure_vector<word> ws; + const BigInt iv = m_params->mul(m_params->inv_mod_p(m_v), m_params->R3(), ws); + return Montgomery_Int(m_params, iv, false); + } + +Montgomery_Int Montgomery_Int::additive_inverse() const + { + return Montgomery_Int(m_params, m_params->p()) - (*this); + } + +Montgomery_Int& Montgomery_Int::mul_by_2(secure_vector<word>& ws) + { + m_v.mod_mul(2, m_params->p(), ws); + return (*this); + } + +Montgomery_Int& Montgomery_Int::mul_by_3(secure_vector<word>& ws) + { + m_v.mod_mul(3, m_params->p(), ws); + return (*this); + } + +Montgomery_Int& Montgomery_Int::mul_by_4(secure_vector<word>& ws) + { + m_v.mod_mul(4, m_params->p(), ws); + return (*this); + } + +Montgomery_Int& Montgomery_Int::mul_by_8(secure_vector<word>& ws) + { + m_v.mod_mul(8, m_params->p(), ws); + return (*this); + } + +} diff --git a/comm/third_party/botan/src/lib/math/numbertheory/monty.h b/comm/third_party/botan/src/lib/math/numbertheory/monty.h new file mode 100644 index 0000000000..8e0cd342fb --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/monty.h @@ -0,0 +1,191 @@ +/* +* (C) 2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_MONTY_INT_H_ +#define BOTAN_MONTY_INT_H_ + +#include <botan/bigint.h> +BOTAN_FUTURE_INTERNAL_HEADER(monty.h) + +namespace Botan { + +class Modular_Reducer; + +class Montgomery_Params; + +/** +* The Montgomery representation of an integer +*/ +class BOTAN_UNSTABLE_API Montgomery_Int final + { + public: + /** + * Create a zero-initialized Montgomery_Int + */ + Montgomery_Int(std::shared_ptr<const Montgomery_Params> params) : m_params(params) {} + + /** + * Create a Montgomery_Int + */ + Montgomery_Int(std::shared_ptr<const Montgomery_Params> params, + const BigInt& v, + bool redc_needed = true); + + /** + * Create a Montgomery_Int + */ + Montgomery_Int(std::shared_ptr<const Montgomery_Params> params, + const uint8_t bits[], size_t len, + bool redc_needed = true); + + /** + * Create a Montgomery_Int + */ + Montgomery_Int(std::shared_ptr<const Montgomery_Params> params, + const word words[], size_t len, + bool redc_needed = true); + + bool operator==(const Montgomery_Int& other) const; + bool operator!=(const Montgomery_Int& other) const { return (m_v != other.m_v); } + + std::vector<uint8_t> serialize() const; + + size_t size() const; + bool is_one() const; + bool is_zero() const; + + void fix_size(); + + /** + * Return the value to normal mod-p space + */ + BigInt value() const; + + /** + * Return the Montgomery representation + */ + const BigInt& repr() const { return m_v; } + + Montgomery_Int operator+(const Montgomery_Int& other) const; + + Montgomery_Int operator-(const Montgomery_Int& other) const; + + Montgomery_Int& operator+=(const Montgomery_Int& other); + + Montgomery_Int& operator-=(const Montgomery_Int& other); + + Montgomery_Int operator*(const Montgomery_Int& other) const; + + Montgomery_Int& operator*=(const Montgomery_Int& other); + + Montgomery_Int& operator*=(const secure_vector<word>& other); + + Montgomery_Int& add(const Montgomery_Int& other, + secure_vector<word>& ws); + + Montgomery_Int& sub(const Montgomery_Int& other, + secure_vector<word>& ws); + + Montgomery_Int mul(const Montgomery_Int& other, + secure_vector<word>& ws) const; + + Montgomery_Int& mul_by(const Montgomery_Int& other, + secure_vector<word>& ws); + + Montgomery_Int& mul_by(const secure_vector<word>& other, + secure_vector<word>& ws); + + Montgomery_Int square(secure_vector<word>& ws) const; + + Montgomery_Int& square_this(secure_vector<word>& ws); + + Montgomery_Int& square_this_n_times(secure_vector<word>& ws, size_t n); + + Montgomery_Int multiplicative_inverse() const; + + Montgomery_Int additive_inverse() const; + + Montgomery_Int& mul_by_2(secure_vector<word>& ws); + + Montgomery_Int& mul_by_3(secure_vector<word>& ws); + + Montgomery_Int& mul_by_4(secure_vector<word>& ws); + + Montgomery_Int& mul_by_8(secure_vector<word>& ws); + + void const_time_poison() const { m_v.const_time_poison(); } + void const_time_unpoison() const { return m_v.const_time_unpoison(); } + + private: + std::shared_ptr<const Montgomery_Params> m_params; + BigInt m_v; + }; + +/** +* Parameters for Montgomery Reduction +*/ +class BOTAN_UNSTABLE_API Montgomery_Params final + { + public: + /** + * Initialize a set of Montgomery reduction parameters. These values + * can be shared by all values in a specific Montgomery domain. + */ + Montgomery_Params(const BigInt& p, const Modular_Reducer& mod_p); + + /** + * Initialize a set of Montgomery reduction parameters. These values + * can be shared by all values in a specific Montgomery domain. + */ + Montgomery_Params(const BigInt& p); + + const BigInt& p() const { return m_p; } + const BigInt& R1() const { return m_r1; } + const BigInt& R2() const { return m_r2; } + const BigInt& R3() const { return m_r3; } + + word p_dash() const { return m_p_dash; } + + size_t p_words() const { return m_p_words; } + + BigInt redc(const BigInt& x, + secure_vector<word>& ws) const; + + BigInt mul(const BigInt& x, + const BigInt& y, + secure_vector<word>& ws) const; + + BigInt mul(const BigInt& x, + const secure_vector<word>& y, + secure_vector<word>& ws) const; + + void mul_by(BigInt& x, + const secure_vector<word>& y, + secure_vector<word>& ws) const; + + void mul_by(BigInt& x, const BigInt& y, + secure_vector<word>& ws) const; + + BigInt sqr(const BigInt& x, + secure_vector<word>& ws) const; + + void square_this(BigInt& x, + secure_vector<word>& ws) const; + + BigInt inv_mod_p(const BigInt& x) const; + + private: + BigInt m_p; + BigInt m_r1; + BigInt m_r2; + BigInt m_r3; + word m_p_dash; + size_t m_p_words; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/math/numbertheory/monty_exp.cpp b/comm/third_party/botan/src/lib/math/numbertheory/monty_exp.cpp new file mode 100644 index 0000000000..02ae795cd9 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/monty_exp.cpp @@ -0,0 +1,254 @@ +/* +* Montgomery Exponentiation +* (C) 1999-2010,2012,2018 Jack Lloyd +* 2016 Matthias Gierlings +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/internal/monty_exp.h> +#include <botan/internal/ct_utils.h> +#include <botan/internal/rounding.h> +#include <botan/numthry.h> +#include <botan/reducer.h> +#include <botan/monty.h> + +namespace Botan { + +class Montgomery_Exponentation_State + { + public: + Montgomery_Exponentation_State(std::shared_ptr<const Montgomery_Params> params, + const BigInt& g, + size_t window_bits, + bool const_time); + + BigInt exponentiation(const BigInt& k, size_t max_k_bits) const; + + BigInt exponentiation_vartime(const BigInt& k) const; + private: + std::shared_ptr<const Montgomery_Params> m_params; + std::vector<Montgomery_Int> m_g; + size_t m_window_bits; + bool m_const_time; + }; + +Montgomery_Exponentation_State::Montgomery_Exponentation_State(std::shared_ptr<const Montgomery_Params> params, + const BigInt& g, + size_t window_bits, + bool const_time) : + m_params(params), + m_window_bits(window_bits == 0 ? 4 : window_bits), + m_const_time(const_time) + { + BOTAN_ARG_CHECK(g < m_params->p(), "Montgomery base too big"); + + if(m_window_bits < 1 || m_window_bits > 12) // really even 8 is too large ... + throw Invalid_Argument("Invalid window bits for Montgomery exponentiation"); + + const size_t window_size = (static_cast<size_t>(1) << m_window_bits); + + m_g.reserve(window_size); + + m_g.push_back(Montgomery_Int(m_params, m_params->R1(), false)); + + m_g.push_back(Montgomery_Int(m_params, g)); + + for(size_t i = 2; i != window_size; ++i) + { + m_g.push_back(m_g[1] * m_g[i - 1]); + } + + // Resize each element to exactly p words + for(size_t i = 0; i != window_size; ++i) + { + m_g[i].fix_size(); + if(const_time) + m_g[i].const_time_poison(); + } + } + +namespace { + +void const_time_lookup(secure_vector<word>& output, + const std::vector<Montgomery_Int>& g, + size_t nibble) + { + BOTAN_ASSERT_NOMSG(g.size() % 2 == 0); // actually a power of 2 + + const size_t words = output.size(); + + clear_mem(output.data(), output.size()); + + for(size_t i = 0; i != g.size(); i += 2) + { + const secure_vector<word>& vec_0 = g[i ].repr().get_word_vector(); + const secure_vector<word>& vec_1 = g[i+1].repr().get_word_vector(); + + BOTAN_ASSERT_NOMSG(vec_0.size() >= words && vec_1.size() >= words); + + const auto mask_0 = CT::Mask<word>::is_equal(nibble, i); + const auto mask_1 = CT::Mask<word>::is_equal(nibble, i+1); + + for(size_t w = 0; w != words; ++w) + { + output[w] |= mask_0.if_set_return(vec_0[w]); + output[w] |= mask_1.if_set_return(vec_1[w]); + } + } + } + +} + +BigInt Montgomery_Exponentation_State::exponentiation(const BigInt& scalar, size_t max_k_bits) const + { + BOTAN_DEBUG_ASSERT(scalar.bits() <= max_k_bits); + // TODO add a const-time implementation of above assert and use it in release builds + + const size_t exp_nibbles = (max_k_bits + m_window_bits - 1) / m_window_bits; + + if(exp_nibbles == 0) + return 1; + + secure_vector<word> e_bits(m_params->p_words()); + secure_vector<word> ws; + + const_time_lookup(e_bits, m_g, scalar.get_substring(m_window_bits*(exp_nibbles-1), m_window_bits)); + Montgomery_Int x(m_params, e_bits.data(), e_bits.size(), false); + + for(size_t i = exp_nibbles - 1; i > 0; --i) + { + x.square_this_n_times(ws, m_window_bits); + const_time_lookup(e_bits, m_g, scalar.get_substring(m_window_bits*(i-1), m_window_bits)); + x.mul_by(e_bits, ws); + } + + x.const_time_unpoison(); + return x.value(); + } + +BigInt Montgomery_Exponentation_State::exponentiation_vartime(const BigInt& scalar) const + { + BOTAN_ASSERT_NOMSG(m_const_time == false); + + const size_t exp_nibbles = (scalar.bits() + m_window_bits - 1) / m_window_bits; + + secure_vector<word> ws; + + if(exp_nibbles == 0) + return 1; + + Montgomery_Int x = m_g[scalar.get_substring(m_window_bits*(exp_nibbles-1), m_window_bits)]; + + for(size_t i = exp_nibbles - 1; i > 0; --i) + { + x.square_this_n_times(ws, m_window_bits); + + const uint32_t nibble = scalar.get_substring(m_window_bits*(i-1), m_window_bits); + if(nibble > 0) + x.mul_by(m_g[nibble], ws); + } + + x.const_time_unpoison(); + return x.value(); + } + +std::shared_ptr<const Montgomery_Exponentation_State> +monty_precompute(std::shared_ptr<const Montgomery_Params> params, + const BigInt& g, + size_t window_bits, + bool const_time) + { + return std::make_shared<const Montgomery_Exponentation_State>(params, g, window_bits, const_time); + } + +BigInt monty_execute(const Montgomery_Exponentation_State& precomputed_state, + const BigInt& k, size_t max_k_bits) + { + return precomputed_state.exponentiation(k, max_k_bits); + } + +BigInt monty_execute_vartime(const Montgomery_Exponentation_State& precomputed_state, + const BigInt& k) + { + return precomputed_state.exponentiation_vartime(k); + } + +BigInt monty_multi_exp(std::shared_ptr<const Montgomery_Params> params_p, + const BigInt& x_bn, + const BigInt& z1, + const BigInt& y_bn, + const BigInt& z2) + { + if(z1.is_negative() || z2.is_negative()) + throw Invalid_Argument("multi_exponentiate exponents must be positive"); + + const size_t z_bits = round_up(std::max(z1.bits(), z2.bits()), 2); + + secure_vector<word> ws; + + const Montgomery_Int one(params_p, params_p->R1(), false); + //const Montgomery_Int one(params_p, 1); + + const Montgomery_Int x1(params_p, x_bn); + const Montgomery_Int x2 = x1.square(ws); + const Montgomery_Int x3 = x2.mul(x1, ws); + + const Montgomery_Int y1(params_p, y_bn); + const Montgomery_Int y2 = y1.square(ws); + const Montgomery_Int y3 = y2.mul(y1, ws); + + const Montgomery_Int y1x1 = y1.mul(x1, ws); + const Montgomery_Int y1x2 = y1.mul(x2, ws); + const Montgomery_Int y1x3 = y1.mul(x3, ws); + + const Montgomery_Int y2x1 = y2.mul(x1, ws); + const Montgomery_Int y2x2 = y2.mul(x2, ws); + const Montgomery_Int y2x3 = y2.mul(x3, ws); + + const Montgomery_Int y3x1 = y3.mul(x1, ws); + const Montgomery_Int y3x2 = y3.mul(x2, ws); + const Montgomery_Int y3x3 = y3.mul(x3, ws); + + const Montgomery_Int* M[16] = { + &one, + &x1, // 0001 + &x2, // 0010 + &x3, // 0011 + &y1, // 0100 + &y1x1, + &y1x2, + &y1x3, + &y2, // 1000 + &y2x1, + &y2x2, + &y2x3, + &y3, // 1100 + &y3x1, + &y3x2, + &y3x3 + }; + + Montgomery_Int H = one; + + for(size_t i = 0; i != z_bits; i += 2) + { + if(i > 0) + { + H.square_this(ws); + H.square_this(ws); + } + + const uint32_t z1_b = z1.get_substring(z_bits - i - 2, 2); + const uint32_t z2_b = z2.get_substring(z_bits - i - 2, 2); + + const uint32_t z12 = (4*z2_b) + z1_b; + + H.mul_by(*M[z12], ws); + } + + return H.value(); + } + +} + diff --git a/comm/third_party/botan/src/lib/math/numbertheory/monty_exp.h b/comm/third_party/botan/src/lib/math/numbertheory/monty_exp.h new file mode 100644 index 0000000000..632d7f7d6e --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/monty_exp.h @@ -0,0 +1,54 @@ +/* +* (C) 2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_MONTY_EXP_H_ +#define BOTAN_MONTY_EXP_H_ + +#include <memory> + +namespace Botan { + +class BigInt; +class Modular_Reducer; + +class Montgomery_Params; + +class Montgomery_Exponentation_State; + +/* +* Precompute for calculating values g^x mod p +*/ +std::shared_ptr<const Montgomery_Exponentation_State> +monty_precompute(std::shared_ptr<const Montgomery_Params> params_p, + const BigInt& g, + size_t window_bits, + bool const_time = true); + +/* +* Return g^k mod p +*/ +BigInt monty_execute(const Montgomery_Exponentation_State& precomputed_state, + const BigInt& k, size_t max_k_bits); + +/* +* Return g^k mod p taking variable time depending on k +* @warning only use this if k is public +*/ +BigInt monty_execute_vartime(const Montgomery_Exponentation_State& precomputed_state, + const BigInt& k); + +/** +* Return (x^z1 * y^z2) % p +*/ +BigInt monty_multi_exp(std::shared_ptr<const Montgomery_Params> params_p, + const BigInt& x, + const BigInt& z1, + const BigInt& y, + const BigInt& z2); + +} + +#endif diff --git a/comm/third_party/botan/src/lib/math/numbertheory/mp_numth.cpp b/comm/third_party/botan/src/lib/math/numbertheory/mp_numth.cpp new file mode 100644 index 0000000000..eef6419965 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/mp_numth.cpp @@ -0,0 +1,84 @@ +/* +* Fused and Important MP Algorithms +* (C) 1999-2007 Jack Lloyd +* 2016 Matthias Gierlings +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/numthry.h> +#include <botan/internal/mp_core.h> +#include <botan/internal/rounding.h> +#include <algorithm> + +namespace Botan { + +/* +* Square a BigInt +*/ +BigInt square(const BigInt& x) + { + BigInt z = x; + secure_vector<word> ws; + z.square(ws); + return z; + } + +/* +* Multiply-Add Operation +*/ +BigInt mul_add(const BigInt& a, const BigInt& b, const BigInt& c) + { + if(c.is_negative()) + throw Invalid_Argument("mul_add: Third argument must be > 0"); + + BigInt::Sign sign = BigInt::Positive; + if(a.sign() != b.sign()) + sign = BigInt::Negative; + + const size_t a_sw = a.sig_words(); + const size_t b_sw = b.sig_words(); + const size_t c_sw = c.sig_words(); + + BigInt r(sign, std::max(a_sw + b_sw, c_sw) + 1); + secure_vector<word> workspace(r.size()); + + bigint_mul(r.mutable_data(), r.size(), + a.data(), a.size(), a_sw, + b.data(), b.size(), b_sw, + workspace.data(), workspace.size()); + + const size_t r_size = std::max(r.sig_words(), c_sw); + bigint_add2(r.mutable_data(), r_size, c.data(), c_sw); + return r; + } + +/* +* Subtract-Multiply Operation +*/ +BigInt sub_mul(const BigInt& a, const BigInt& b, const BigInt& c) + { + if(a.is_negative() || b.is_negative()) + throw Invalid_Argument("sub_mul: First two arguments must be >= 0"); + + BigInt r = a; + r -= b; + r *= c; + return r; + } + +/* +* Multiply-Subtract Operation +*/ +BigInt mul_sub(const BigInt& a, const BigInt& b, const BigInt& c) + { + if(c.is_negative() || c.is_zero()) + throw Invalid_Argument("mul_sub: Third argument must be > 0"); + + BigInt r = a; + r *= b; + r -= c; + return r; + } + +} diff --git a/comm/third_party/botan/src/lib/math/numbertheory/nistp_redc.cpp b/comm/third_party/botan/src/lib/math/numbertheory/nistp_redc.cpp new file mode 100644 index 0000000000..7f5ff18b95 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/nistp_redc.cpp @@ -0,0 +1,583 @@ +/* +* NIST prime reductions +* (C) 2014,2015,2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/curve_nistp.h> +#include <botan/internal/mp_core.h> +#include <botan/internal/mp_asmi.h> +#include <botan/internal/ct_utils.h> + +namespace Botan { + +const BigInt& prime_p521() + { + static const BigInt p521("0x1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"); + + return p521; + } + +void redc_p521(BigInt& x, secure_vector<word>& ws) + { + const size_t p_full_words = 521 / BOTAN_MP_WORD_BITS; + const size_t p_top_bits = 521 % BOTAN_MP_WORD_BITS; + const size_t p_words = p_full_words + 1; + +#if (BOTAN_MP_WORD_BITS == 64) + static const word p521_words[p_words] = { + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0x1FF }; +#else + static const word p521_words[p_words] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x1FF }; +#endif + + if(ws.size() < p_words + 1) + ws.resize(p_words + 1); + + clear_mem(ws.data(), ws.size()); + bigint_shr2(ws.data(), x.data(), std::min(x.size(), 2*p_words), p_full_words, p_top_bits); + + x.mask_bits(521); + x.grow_to(p_words); + + // Word-level carry will be zero + word carry = bigint_add3_nc(x.mutable_data(), x.data(), p_words, ws.data(), p_words); + BOTAN_ASSERT_EQUAL(carry, 0, "Final carry in P-521 reduction"); + + const word top_word = x.word_at(p_full_words); + + /* + * Check if we need to reduce modulo P + * There are two possible cases: + * - The result overflowed past 521 bits, in which case bit 522 will be set + * - The result is exactly 2**521 - 1 + */ + const auto bit_522_set = CT::Mask<word>::expand(top_word >> p_top_bits); + + word and_512 = MP_WORD_MAX; + for(size_t i = 0; i != p_full_words; ++i) + and_512 &= x.word_at(i); + const auto all_512_low_bits_set = CT::Mask<word>::is_equal(and_512, MP_WORD_MAX); + const auto has_p521_top_word = CT::Mask<word>::is_equal(top_word, 0x1FF); + const auto is_p521 = all_512_low_bits_set & has_p521_top_word; + + const auto needs_reduction = is_p521 | bit_522_set; + + bigint_cnd_sub(needs_reduction.value(), x.mutable_data(), p521_words, p_words); + } + +namespace { + +/** +* Treating this MPI as a sequence of 32-bit words in big-endian +* order, return word i. The array is assumed to be large enough. +*/ +inline uint32_t get_uint32(const word xw[], size_t i) + { +#if (BOTAN_MP_WORD_BITS == 32) + return xw[i]; +#else + return static_cast<uint32_t>(xw[i/2] >> ((i % 2)*32)); +#endif + } + +inline void set_words(word x[], size_t i, uint32_t R0, uint32_t R1) + { +#if (BOTAN_MP_WORD_BITS == 32) + x[i] = R0; + x[i+1] = R1; +#else + x[i/2] = (static_cast<uint64_t>(R1) << 32) | R0; +#endif + } + +} + +const BigInt& prime_p192() + { + static const BigInt p192("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFF"); + return p192; + } + +void redc_p192(BigInt& x, secure_vector<word>& ws) + { + BOTAN_UNUSED(ws); + + static const size_t p192_limbs = 192 / BOTAN_MP_WORD_BITS; + + x.grow_to(2*p192_limbs); + word* xw = x.mutable_data(); + + const uint64_t X00 = get_uint32(xw, 0); + const uint64_t X01 = get_uint32(xw, 1); + const uint64_t X02 = get_uint32(xw, 2); + const uint64_t X03 = get_uint32(xw, 3); + const uint64_t X04 = get_uint32(xw, 4); + const uint64_t X05 = get_uint32(xw, 5); + const uint64_t X06 = get_uint32(xw, 6); + const uint64_t X07 = get_uint32(xw, 7); + const uint64_t X08 = get_uint32(xw, 8); + const uint64_t X09 = get_uint32(xw, 9); + const uint64_t X10 = get_uint32(xw, 10); + const uint64_t X11 = get_uint32(xw, 11); + + const uint64_t S0 = X00 + X06 + X10; + const uint64_t S1 = X01 + X07 + X11; + const uint64_t S2 = X02 + X06 + X08 + X10; + const uint64_t S3 = X03 + X07 + X09 + X11; + const uint64_t S4 = X04 + X08 + X10; + const uint64_t S5 = X05 + X09 + X11; + + uint64_t S = 0; + uint32_t R0 = 0, R1 = 0; + + S += S0; + R0 = static_cast<uint32_t>(S); + S >>= 32; + + S += S1; + R1 = static_cast<uint32_t>(S); + S >>= 32; + + set_words(xw, 0, R0, R1); + + S += S2; + R0 = static_cast<uint32_t>(S); + S >>= 32; + + S += S3; + R1 = static_cast<uint32_t>(S); + S >>= 32; + + set_words(xw, 2, R0, R1); + + S += S4; + R0 = static_cast<uint32_t>(S); + S >>= 32; + + S += S5; + R1 = static_cast<uint32_t>(S); + S >>= 32; + + set_words(xw, 4, R0, R1); + + // No underflow possible + + /* + This is a table of (i*P-192) % 2**192 for i in 1...3 + */ + static const word p192_mults[3][p192_limbs] = { +#if (BOTAN_MP_WORD_BITS == 64) + {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF}, + {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFF}, + {0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF}, +#else + {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, + {0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, + {0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, +#endif + }; + + CT::unpoison(S); + BOTAN_ASSERT(S <= 2, "Expected overflow"); + + BOTAN_ASSERT_NOMSG(x.size() >= p192_limbs + 1); + x.mask_bits(192); + word borrow = bigint_sub2(x.mutable_data(), p192_limbs + 1, p192_mults[S], p192_limbs); + BOTAN_DEBUG_ASSERT(borrow == 0 || borrow == 1); + bigint_cnd_add(borrow, x.mutable_data(), p192_limbs + 1, p192_mults[0], p192_limbs); + } + +const BigInt& prime_p224() + { + static const BigInt p224("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF000000000000000000000001"); + return p224; + } + +void redc_p224(BigInt& x, secure_vector<word>& ws) + { + static const size_t p224_limbs = (BOTAN_MP_WORD_BITS == 32) ? 7 : 4; + + BOTAN_UNUSED(ws); + + x.grow_to(2*p224_limbs); + word* xw = x.mutable_data(); + + const int64_t X00 = get_uint32(xw, 0); + const int64_t X01 = get_uint32(xw, 1); + const int64_t X02 = get_uint32(xw, 2); + const int64_t X03 = get_uint32(xw, 3); + const int64_t X04 = get_uint32(xw, 4); + const int64_t X05 = get_uint32(xw, 5); + const int64_t X06 = get_uint32(xw, 6); + const int64_t X07 = get_uint32(xw, 7); + const int64_t X08 = get_uint32(xw, 8); + const int64_t X09 = get_uint32(xw, 9); + const int64_t X10 = get_uint32(xw, 10); + const int64_t X11 = get_uint32(xw, 11); + const int64_t X12 = get_uint32(xw, 12); + const int64_t X13 = get_uint32(xw, 13); + + // One full copy of P224 is added, so the result is always positive + + const int64_t S0 = 0x00000001 + X00 - X07 - X11; + const int64_t S1 = 0x00000000 + X01 - X08 - X12; + const int64_t S2 = 0x00000000 + X02 - X09 - X13; + const int64_t S3 = 0xFFFFFFFF + X03 + X07 + X11 - X10; + const int64_t S4 = 0xFFFFFFFF + X04 + X08 + X12 - X11; + const int64_t S5 = 0xFFFFFFFF + X05 + X09 + X13 - X12; + const int64_t S6 = 0xFFFFFFFF + X06 + X10 - X13; + + int64_t S = 0; + uint32_t R0 = 0, R1 = 0; + + S += S0; + R0 = static_cast<uint32_t>(S); + S >>= 32; + + S += S1; + R1 = static_cast<uint32_t>(S); + S >>= 32; + + set_words(xw, 0, R0, R1); + + S += S2; + R0 = static_cast<uint32_t>(S); + S >>= 32; + + S += S3; + R1 = static_cast<uint32_t>(S); + S >>= 32; + + set_words(xw, 2, R0, R1); + + S += S4; + R0 = static_cast<uint32_t>(S); + S >>= 32; + + S += S5; + R1 = static_cast<uint32_t>(S); + S >>= 32; + + set_words(xw, 4, R0, R1); + + S += S6; + R0 = static_cast<uint32_t>(S); + S >>= 32; + + set_words(xw, 6, R0, 0); + + static const word p224_mults[3][p224_limbs] = { +#if (BOTAN_MP_WORD_BITS == 64) + {0x0000000000000001, 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF}, + {0x0000000000000002, 0xFFFFFFFE00000000, 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF}, + {0x0000000000000003, 0xFFFFFFFD00000000, 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF}, +#else + {0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, + {0x00000002, 0x00000000, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, + {0x00000003, 0x00000000, 0x00000000, 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF} +#endif + + }; + + CT::unpoison(S); + BOTAN_ASSERT(S >= 0 && S <= 2, "Expected overflow"); + + BOTAN_ASSERT_NOMSG(x.size() >= p224_limbs + 1); + x.mask_bits(224); + word borrow = bigint_sub2(x.mutable_data(), p224_limbs + 1, p224_mults[S], p224_limbs); + BOTAN_DEBUG_ASSERT(borrow == 0 || borrow == 1); + bigint_cnd_add(borrow, x.mutable_data(), p224_limbs + 1, p224_mults[0], p224_limbs); + } + +const BigInt& prime_p256() + { + static const BigInt p256("0xFFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF"); + return p256; + } + +void redc_p256(BigInt& x, secure_vector<word>& ws) + { + static const size_t p256_limbs = (BOTAN_MP_WORD_BITS == 32) ? 8 : 4; + + BOTAN_UNUSED(ws); + + x.grow_to(2*p256_limbs); + word* xw = x.mutable_data(); + + const int64_t X00 = get_uint32(xw, 0); + const int64_t X01 = get_uint32(xw, 1); + const int64_t X02 = get_uint32(xw, 2); + const int64_t X03 = get_uint32(xw, 3); + const int64_t X04 = get_uint32(xw, 4); + const int64_t X05 = get_uint32(xw, 5); + const int64_t X06 = get_uint32(xw, 6); + const int64_t X07 = get_uint32(xw, 7); + const int64_t X08 = get_uint32(xw, 8); + const int64_t X09 = get_uint32(xw, 9); + const int64_t X10 = get_uint32(xw, 10); + const int64_t X11 = get_uint32(xw, 11); + const int64_t X12 = get_uint32(xw, 12); + const int64_t X13 = get_uint32(xw, 13); + const int64_t X14 = get_uint32(xw, 14); + const int64_t X15 = get_uint32(xw, 15); + + // Adds 6 * P-256 to prevent underflow + const int64_t S0 = 0xFFFFFFFA + X00 + X08 + X09 - (X11 + X12 + X13) - X14; + const int64_t S1 = 0xFFFFFFFF + X01 + X09 + X10 - X12 - (X13 + X14 + X15); + const int64_t S2 = 0xFFFFFFFF + X02 + X10 + X11 - (X13 + X14 + X15); + const int64_t S3 = 0x00000005 + X03 + (X11 + X12)*2 + X13 - X15 - X08 - X09; + const int64_t S4 = 0x00000000 + X04 + (X12 + X13)*2 + X14 - X09 - X10; + const int64_t S5 = 0x00000000 + X05 + (X13 + X14)*2 + X15 - X10 - X11; + const int64_t S6 = 0x00000006 + X06 + X13 + X14*3 + X15*2 - X08 - X09; + const int64_t S7 = 0xFFFFFFFA + X07 + X15*3 + X08 - X10 - (X11 + X12 + X13); + + int64_t S = 0; + + uint32_t R0 = 0, R1 = 0; + + S += S0; + R0 = static_cast<uint32_t>(S); + S >>= 32; + + S += S1; + R1 = static_cast<uint32_t>(S); + S >>= 32; + + set_words(xw, 0, R0, R1); + + S += S2; + R0 = static_cast<uint32_t>(S); + S >>= 32; + + S += S3; + R1 = static_cast<uint32_t>(S); + S >>= 32; + + set_words(xw, 2, R0, R1); + + S += S4; + R0 = static_cast<uint32_t>(S); + S >>= 32; + + S += S5; + R1 = static_cast<uint32_t>(S); + S >>= 32; + + set_words(xw, 4, R0, R1); + + S += S6; + R0 = static_cast<uint32_t>(S); + S >>= 32; + + S += S7; + R1 = static_cast<uint32_t>(S); + S >>= 32; + set_words(xw, 6, R0, R1); + + S += 5; // the top digits of 6*P-256 + + /* + This is a table of (i*P-256) % 2**256 for i in 1...10 + */ + static const word p256_mults[11][p256_limbs] = { +#if (BOTAN_MP_WORD_BITS == 64) + {0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001}, + {0xFFFFFFFFFFFFFFFE, 0x00000001FFFFFFFF, 0x0000000000000000, 0xFFFFFFFE00000002}, + {0xFFFFFFFFFFFFFFFD, 0x00000002FFFFFFFF, 0x0000000000000000, 0xFFFFFFFD00000003}, + {0xFFFFFFFFFFFFFFFC, 0x00000003FFFFFFFF, 0x0000000000000000, 0xFFFFFFFC00000004}, + {0xFFFFFFFFFFFFFFFB, 0x00000004FFFFFFFF, 0x0000000000000000, 0xFFFFFFFB00000005}, + {0xFFFFFFFFFFFFFFFA, 0x00000005FFFFFFFF, 0x0000000000000000, 0xFFFFFFFA00000006}, + {0xFFFFFFFFFFFFFFF9, 0x00000006FFFFFFFF, 0x0000000000000000, 0xFFFFFFF900000007}, + {0xFFFFFFFFFFFFFFF8, 0x00000007FFFFFFFF, 0x0000000000000000, 0xFFFFFFF800000008}, + {0xFFFFFFFFFFFFFFF7, 0x00000008FFFFFFFF, 0x0000000000000000, 0xFFFFFFF700000009}, + {0xFFFFFFFFFFFFFFF6, 0x00000009FFFFFFFF, 0x0000000000000000, 0xFFFFFFF60000000A}, + {0xFFFFFFFFFFFFFFF5, 0x0000000AFFFFFFFF, 0x0000000000000000, 0xFFFFFFF50000000B}, +#else + {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0xFFFFFFFF}, + {0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000001, 0x00000000, 0x00000000, 0x00000002, 0xFFFFFFFE}, + {0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000002, 0x00000000, 0x00000000, 0x00000003, 0xFFFFFFFD}, + {0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000003, 0x00000000, 0x00000000, 0x00000004, 0xFFFFFFFC}, + {0xFFFFFFFB, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000004, 0x00000000, 0x00000000, 0x00000005, 0xFFFFFFFB}, + {0xFFFFFFFA, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000005, 0x00000000, 0x00000000, 0x00000006, 0xFFFFFFFA}, + {0xFFFFFFF9, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000006, 0x00000000, 0x00000000, 0x00000007, 0xFFFFFFF9}, + {0xFFFFFFF8, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000007, 0x00000000, 0x00000000, 0x00000008, 0xFFFFFFF8}, + {0xFFFFFFF7, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000008, 0x00000000, 0x00000000, 0x00000009, 0xFFFFFFF7}, + {0xFFFFFFF6, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000009, 0x00000000, 0x00000000, 0x0000000A, 0xFFFFFFF6}, + {0xFFFFFFF5, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000000A, 0x00000000, 0x00000000, 0x0000000B, 0xFFFFFFF5}, +#endif + }; + + CT::unpoison(S); + BOTAN_ASSERT(S >= 0 && S <= 10, "Expected overflow"); + + BOTAN_ASSERT_NOMSG(x.size() >= p256_limbs + 1); + x.mask_bits(256); + word borrow = bigint_sub2(x.mutable_data(), p256_limbs + 1, p256_mults[S], p256_limbs); + BOTAN_DEBUG_ASSERT(borrow == 0 || borrow == 1); + bigint_cnd_add(borrow, x.mutable_data(), p256_limbs + 1, p256_mults[0], p256_limbs); + } + +const BigInt& prime_p384() + { + static const BigInt p384("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFF"); + return p384; + } + +void redc_p384(BigInt& x, secure_vector<word>& ws) + { + BOTAN_UNUSED(ws); + + static const size_t p384_limbs = (BOTAN_MP_WORD_BITS == 32) ? 12 : 6; + + x.grow_to(2*p384_limbs); + word* xw = x.mutable_data(); + + const int64_t X00 = get_uint32(xw, 0); + const int64_t X01 = get_uint32(xw, 1); + const int64_t X02 = get_uint32(xw, 2); + const int64_t X03 = get_uint32(xw, 3); + const int64_t X04 = get_uint32(xw, 4); + const int64_t X05 = get_uint32(xw, 5); + const int64_t X06 = get_uint32(xw, 6); + const int64_t X07 = get_uint32(xw, 7); + const int64_t X08 = get_uint32(xw, 8); + const int64_t X09 = get_uint32(xw, 9); + const int64_t X10 = get_uint32(xw, 10); + const int64_t X11 = get_uint32(xw, 11); + const int64_t X12 = get_uint32(xw, 12); + const int64_t X13 = get_uint32(xw, 13); + const int64_t X14 = get_uint32(xw, 14); + const int64_t X15 = get_uint32(xw, 15); + const int64_t X16 = get_uint32(xw, 16); + const int64_t X17 = get_uint32(xw, 17); + const int64_t X18 = get_uint32(xw, 18); + const int64_t X19 = get_uint32(xw, 19); + const int64_t X20 = get_uint32(xw, 20); + const int64_t X21 = get_uint32(xw, 21); + const int64_t X22 = get_uint32(xw, 22); + const int64_t X23 = get_uint32(xw, 23); + + // One copy of P-384 is added to prevent underflow + const int64_t S0 = 0xFFFFFFFF + X00 + X12 + X20 + X21 - X23; + const int64_t S1 = 0x00000000 + X01 + X13 + X22 + X23 - X12 - X20; + const int64_t S2 = 0x00000000 + X02 + X14 + X23 - X13 - X21; + const int64_t S3 = 0xFFFFFFFF + X03 + X12 + X15 + X20 + X21 - X14 - X22 - X23; + const int64_t S4 = 0xFFFFFFFE + X04 + X12 + X13 + X16 + X20 + X21*2 + X22 - X15 - X23*2; + const int64_t S5 = 0xFFFFFFFF + X05 + X13 + X14 + X17 + X21 + X22*2 + X23 - X16; + const int64_t S6 = 0xFFFFFFFF + X06 + X14 + X15 + X18 + X22 + X23*2 - X17; + const int64_t S7 = 0xFFFFFFFF + X07 + X15 + X16 + X19 + X23 - X18; + const int64_t S8 = 0xFFFFFFFF + X08 + X16 + X17 + X20 - X19; + const int64_t S9 = 0xFFFFFFFF + X09 + X17 + X18 + X21 - X20; + const int64_t SA = 0xFFFFFFFF + X10 + X18 + X19 + X22 - X21; + const int64_t SB = 0xFFFFFFFF + X11 + X19 + X20 + X23 - X22; + + int64_t S = 0; + + uint32_t R0 = 0, R1 = 0; + + S += S0; + R0 = static_cast<uint32_t>(S); + S >>= 32; + + S += S1; + R1 = static_cast<uint32_t>(S); + S >>= 32; + + set_words(xw, 0, R0, R1); + + S += S2; + R0 = static_cast<uint32_t>(S); + S >>= 32; + + S += S3; + R1 = static_cast<uint32_t>(S); + S >>= 32; + + set_words(xw, 2, R0, R1); + + S += S4; + R0 = static_cast<uint32_t>(S); + S >>= 32; + + S += S5; + R1 = static_cast<uint32_t>(S); + S >>= 32; + + set_words(xw, 4, R0, R1); + + S += S6; + R0 = static_cast<uint32_t>(S); + S >>= 32; + + S += S7; + R1 = static_cast<uint32_t>(S); + S >>= 32; + + set_words(xw, 6, R0, R1); + + S += S8; + R0 = static_cast<uint32_t>(S); + S >>= 32; + + S += S9; + R1 = static_cast<uint32_t>(S); + S >>= 32; + + set_words(xw, 8, R0, R1); + + S += SA; + R0 = static_cast<uint32_t>(S); + S >>= 32; + + S += SB; + R1 = static_cast<uint32_t>(S); + S >>= 32; + + set_words(xw, 10, R0, R1); + + /* + This is a table of (i*P-384) % 2**384 for i in 1...4 + */ + static const word p384_mults[5][p384_limbs] = { +#if (BOTAN_MP_WORD_BITS == 64) + {0x00000000FFFFFFFF, 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + {0x00000001FFFFFFFE, 0xFFFFFFFE00000000, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + {0x00000002FFFFFFFD, 0xFFFFFFFD00000000, 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + {0x00000003FFFFFFFC, 0xFFFFFFFC00000000, 0xFFFFFFFFFFFFFFFB, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + {0x00000004FFFFFFFB, 0xFFFFFFFB00000000, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + +#else + {0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, + {0xFFFFFFFE, 0x00000001, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, + {0xFFFFFFFD, 0x00000002, 0x00000000, 0xFFFFFFFD, 0xFFFFFFFC, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, + {0xFFFFFFFC, 0x00000003, 0x00000000, 0xFFFFFFFC, 0xFFFFFFFB, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, + {0xFFFFFFFB, 0x00000004, 0x00000000, 0xFFFFFFFB, 0xFFFFFFFA, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, +#endif + }; + + CT::unpoison(S); + BOTAN_ASSERT(S >= 0 && S <= 4, "Expected overflow"); + + BOTAN_ASSERT_NOMSG(x.size() >= p384_limbs + 1); + x.mask_bits(384); + word borrow = bigint_sub2(x.mutable_data(), p384_limbs + 1, p384_mults[S], p384_limbs); + BOTAN_DEBUG_ASSERT(borrow == 0 || borrow == 1); + bigint_cnd_add(borrow, x.mutable_data(), p384_limbs + 1, p384_mults[0], p384_limbs); + } + +} diff --git a/comm/third_party/botan/src/lib/math/numbertheory/numthry.cpp b/comm/third_party/botan/src/lib/math/numbertheory/numthry.cpp new file mode 100644 index 0000000000..51afa94c64 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/numthry.cpp @@ -0,0 +1,268 @@ +/* +* Number Theory Functions +* (C) 1999-2011,2016,2018,2019 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/numthry.h> +#include <botan/reducer.h> +#include <botan/monty.h> +#include <botan/divide.h> +#include <botan/rng.h> +#include <botan/internal/ct_utils.h> +#include <botan/internal/mp_core.h> +#include <botan/internal/monty_exp.h> +#include <botan/internal/primality.h> +#include <algorithm> + +namespace Botan { + +namespace { + +void sub_abs(BigInt& z, const BigInt& x, const BigInt& y) + { + const size_t x_sw = x.sig_words(); + const size_t y_sw = y.sig_words(); + z.resize(std::max(x_sw, y_sw)); + + bigint_sub_abs(z.mutable_data(), + x.data(), x_sw, + y.data(), y_sw); + } + +} + +/* +* Return the number of 0 bits at the end of n +*/ +size_t low_zero_bits(const BigInt& n) + { + size_t low_zero = 0; + + auto seen_nonempty_word = CT::Mask<word>::cleared(); + + for(size_t i = 0; i != n.size(); ++i) + { + const word x = n.word_at(i); + + // ctz(0) will return sizeof(word) + const size_t tz_x = ctz(x); + + // if x > 0 we want to count tz_x in total but not any + // further words, so set the mask after the addition + low_zero += seen_nonempty_word.if_not_set_return(tz_x); + + seen_nonempty_word |= CT::Mask<word>::expand(x); + } + + // if we saw no words with x > 0 then n == 0 and the value we have + // computed is meaningless. Instead return 0 in that case. + return seen_nonempty_word.if_set_return(low_zero); + } + +namespace { + +size_t safegcd_loop_bound(size_t f_bits, size_t g_bits) + { + const size_t d = std::max(f_bits, g_bits); + + if(d < 46) + return (49*d + 80) / 17; + else + return (49*d + 57) / 17; + } + +} + +/* +* Calculate the GCD +*/ +BigInt gcd(const BigInt& a, const BigInt& b) + { + if(a.is_zero()) + return abs(b); + if(b.is_zero()) + return abs(a); + if(a == 1 || b == 1) + return 1; + + // See https://gcd.cr.yp.to/safegcd-20190413.pdf fig 1.2 + + BigInt f = a; + BigInt g = b; + f.const_time_poison(); + g.const_time_poison(); + + f.set_sign(BigInt::Positive); + g.set_sign(BigInt::Positive); + + const size_t common2s = std::min(low_zero_bits(f), low_zero_bits(g)); + CT::unpoison(common2s); + + f >>= common2s; + g >>= common2s; + + f.ct_cond_swap(f.is_even(), g); + + int32_t delta = 1; + + const size_t loop_cnt = safegcd_loop_bound(f.bits(), g.bits()); + + BigInt newg, t; + for(size_t i = 0; i != loop_cnt; ++i) + { + sub_abs(newg, f, g); + + const bool need_swap = (g.is_odd() && delta > 0); + + // if(need_swap) { delta *= -1 } else { delta *= 1 } + delta *= CT::Mask<uint8_t>::expand(need_swap).if_not_set_return(2) - 1; + f.ct_cond_swap(need_swap, g); + g.ct_cond_swap(need_swap, newg); + + delta += 1; + + g.ct_cond_add(g.is_odd(), f); + g >>= 1; + } + + f <<= common2s; + + f.const_time_unpoison(); + g.const_time_unpoison(); + + BOTAN_ASSERT_NOMSG(g.is_zero()); + + return f; + } + +/* +* Calculate the LCM +*/ +BigInt lcm(const BigInt& a, const BigInt& b) + { + return ct_divide(a * b, gcd(a, b)); + } + +/* +* Modular Exponentiation +*/ +BigInt power_mod(const BigInt& base, const BigInt& exp, const BigInt& mod) + { + if(mod.is_negative() || mod == 1) + { + return 0; + } + + if(base.is_zero() || mod.is_zero()) + { + if(exp.is_zero()) + return 1; + return 0; + } + + Modular_Reducer reduce_mod(mod); + + const size_t exp_bits = exp.bits(); + + if(mod.is_odd()) + { + const size_t powm_window = 4; + + auto monty_mod = std::make_shared<Montgomery_Params>(mod, reduce_mod); + auto powm_base_mod = monty_precompute(monty_mod, reduce_mod.reduce(base), powm_window); + return monty_execute(*powm_base_mod, exp, exp_bits); + } + + /* + Support for even modulus is just a convenience and not considered + cryptographically important, so this implementation is slow ... + */ + BigInt accum = 1; + BigInt g = reduce_mod.reduce(base); + BigInt t; + + for(size_t i = 0; i != exp_bits; ++i) + { + t = reduce_mod.multiply(g, accum); + g = reduce_mod.square(g); + accum.ct_cond_assign(exp.get_bit(i), t); + } + return accum; + } + + +BigInt is_perfect_square(const BigInt& C) + { + if(C < 1) + throw Invalid_Argument("is_perfect_square requires C >= 1"); + if(C == 1) + return 1; + + const size_t n = C.bits(); + const size_t m = (n + 1) / 2; + const BigInt B = C + BigInt::power_of_2(m); + + BigInt X = BigInt::power_of_2(m) - 1; + BigInt X2 = (X*X); + + for(;;) + { + X = (X2 + C) / (2*X); + X2 = (X*X); + + if(X2 < B) + break; + } + + if(X2 == C) + return X; + else + return 0; + } + +/* +* Test for primality using Miller-Rabin +*/ +bool is_prime(const BigInt& n, + RandomNumberGenerator& rng, + size_t prob, + bool is_random) + { + if(n == 2) + return true; + if(n <= 1 || n.is_even()) + return false; + + const size_t n_bits = n.bits(); + + // Fast path testing for small numbers (<= 65521) + if(n_bits <= 16) + { + const uint16_t num = static_cast<uint16_t>(n.word_at(0)); + + return std::binary_search(PRIMES, PRIMES + PRIME_TABLE_SIZE, num); + } + + Modular_Reducer mod_n(n); + + if(rng.is_seeded()) + { + const size_t t = miller_rabin_test_iterations(n_bits, prob, is_random); + + if(is_miller_rabin_probable_prime(n, mod_n, rng, t) == false) + return false; + + if(is_random) + return true; + else + return is_lucas_probable_prime(n, mod_n); + } + else + { + return is_bailie_psw_probable_prime(n, mod_n); + } + } + +} diff --git a/comm/third_party/botan/src/lib/math/numbertheory/numthry.h b/comm/third_party/botan/src/lib/math/numbertheory/numthry.h new file mode 100644 index 0000000000..be9cd985ea --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/numthry.h @@ -0,0 +1,296 @@ +/* +* Number Theory Functions +* (C) 1999-2007,2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_NUMBER_THEORY_H_ +#define BOTAN_NUMBER_THEORY_H_ + +#include <botan/bigint.h> + +namespace Botan { + +class RandomNumberGenerator; + +/** +* Fused multiply-add +* @param a an integer +* @param b an integer +* @param c an integer +* @return (a*b)+c +*/ +BigInt BOTAN_PUBLIC_API(2,0) BOTAN_DEPRECATED("Just use (a*b)+c") + mul_add(const BigInt& a, + const BigInt& b, + const BigInt& c); + +/** +* Fused subtract-multiply +* @param a an integer +* @param b an integer +* @param c an integer +* @return (a-b)*c +*/ +BigInt BOTAN_PUBLIC_API(2,0) BOTAN_DEPRECATED("Just use (a-b)*c") + sub_mul(const BigInt& a, + const BigInt& b, + const BigInt& c); + +/** +* Fused multiply-subtract +* @param a an integer +* @param b an integer +* @param c an integer +* @return (a*b)-c +*/ +BigInt BOTAN_PUBLIC_API(2,0) BOTAN_DEPRECATED("Just use (a*b)-c") + mul_sub(const BigInt& a, + const BigInt& b, + const BigInt& c); + +/** +* Return the absolute value +* @param n an integer +* @return absolute value of n +*/ +inline BigInt abs(const BigInt& n) { return n.abs(); } + +/** +* Compute the greatest common divisor +* @param x a positive integer +* @param y a positive integer +* @return gcd(x,y) +*/ +BigInt BOTAN_PUBLIC_API(2,0) gcd(const BigInt& x, const BigInt& y); + +/** +* Least common multiple +* @param x a positive integer +* @param y a positive integer +* @return z, smallest integer such that z % x == 0 and z % y == 0 +*/ +BigInt BOTAN_PUBLIC_API(2,0) lcm(const BigInt& x, const BigInt& y); + +/** +* @param x an integer +* @return (x*x) +*/ +BigInt BOTAN_PUBLIC_API(2,0) square(const BigInt& x); + +/** +* Modular inversion. This algorithm is const time with respect to x, +* as long as x is less than modulus. It also avoids leaking +* information about the modulus, except that it does leak which of 3 +* categories the modulus is in: an odd integer, a power of 2, or some +* other even number, and if the modulus is even, leaks the power of 2 +* which divides the modulus. +* +* @param x a positive integer +* @param modulus a positive integer +* @return y st (x*y) % modulus == 1 or 0 if no such value +*/ +BigInt BOTAN_PUBLIC_API(2,0) inverse_mod(const BigInt& x, + const BigInt& modulus); + +/** +* Deprecated modular inversion function. Use inverse_mod instead. +* @param x a positive integer +* @param modulus a positive integer +* @return y st (x*y) % modulus == 1 or 0 if no such value +*/ +BigInt BOTAN_DEPRECATED_API("Use inverse_mod") inverse_euclid(const BigInt& x, const BigInt& modulus); + +/** +* Deprecated modular inversion function. Use inverse_mod instead. +*/ +BigInt BOTAN_DEPRECATED_API("Use inverse_mod") ct_inverse_mod_odd_modulus(const BigInt& x, const BigInt& modulus); + +/** +* Return a^-1 * 2^k mod b +* Returns k, between n and 2n +* Not const time +*/ +size_t BOTAN_PUBLIC_API(2,0) BOTAN_DEPRECATED("Use inverse_mod") + almost_montgomery_inverse(BigInt& result, + const BigInt& a, + const BigInt& b); + +/** +* Call almost_montgomery_inverse and correct the result to a^-1 mod b +*/ +BigInt BOTAN_PUBLIC_API(2,0) BOTAN_DEPRECATED("Use inverse_mod") + normalized_montgomery_inverse(const BigInt& a, const BigInt& b); + + +/** +* Compute the Jacobi symbol. If n is prime, this is equivalent +* to the Legendre symbol. +* @see http://mathworld.wolfram.com/JacobiSymbol.html +* +* @param a is a non-negative integer +* @param n is an odd integer > 1 +* @return (n / m) +*/ +int32_t BOTAN_PUBLIC_API(2,0) jacobi(const BigInt& a, const BigInt& n); + +/** +* Modular exponentation +* @param b an integer base +* @param x a positive exponent +* @param m a positive modulus +* @return (b^x) % m +*/ +BigInt BOTAN_PUBLIC_API(2,0) power_mod(const BigInt& b, + const BigInt& x, + const BigInt& m); + +/** +* Compute the square root of x modulo a prime using the +* Tonelli-Shanks algorithm +* +* @param x the input +* @param p the prime +* @return y such that (y*y)%p == x, or -1 if no such integer +*/ +BigInt BOTAN_PUBLIC_API(2,0) ressol(const BigInt& x, const BigInt& p); + +/* +* Compute -input^-1 mod 2^MP_WORD_BITS. Throws an exception if input +* is even. If input is odd, then input and 2^n are relatively prime +* and an inverse exists. +*/ +word BOTAN_PUBLIC_API(2,0) BOTAN_DEPRECATED("Use inverse_mod") + monty_inverse(word input); + +/** +* @param x an integer +* @return count of the low zero bits in x, or, equivalently, the +* largest value of n such that 2^n divides x evenly. Returns +* zero if x is equal to zero. +*/ +size_t BOTAN_PUBLIC_API(2,0) low_zero_bits(const BigInt& x); + +/** +* Check for primality +* @param n a positive integer to test for primality +* @param rng a random number generator +* @param prob chance of false positive is bounded by 1/2**prob +* @param is_random true if n was randomly chosen by us +* @return true if all primality tests passed, otherwise false +*/ +bool BOTAN_PUBLIC_API(2,0) is_prime(const BigInt& n, + RandomNumberGenerator& rng, + size_t prob = 64, + bool is_random = false); + +/** +* Test if the positive integer x is a perfect square ie if there +* exists some positive integer y st y*y == x +* See FIPS 186-4 sec C.4 +* @return 0 if the integer is not a perfect square, otherwise +* returns the positive y st y*y == x +*/ +BigInt BOTAN_PUBLIC_API(2,8) is_perfect_square(const BigInt& x); + +inline bool BOTAN_DEPRECATED("Use is_prime") + quick_check_prime(const BigInt& n, RandomNumberGenerator& rng) + { return is_prime(n, rng, 32); } + +inline bool BOTAN_DEPRECATED("Use is_prime") + check_prime(const BigInt& n, RandomNumberGenerator& rng) + { return is_prime(n, rng, 56); } + +inline bool BOTAN_DEPRECATED("Use is_prime") + verify_prime(const BigInt& n, RandomNumberGenerator& rng) + { return is_prime(n, rng, 80); } + +/** +* Randomly generate a prime suitable for discrete logarithm parameters +* @param rng a random number generator +* @param bits how large the resulting prime should be in bits +* @param coprime a positive integer that (prime - 1) should be coprime to +* @param equiv a non-negative number that the result should be + equivalent to modulo equiv_mod +* @param equiv_mod the modulus equiv should be checked against +* @param prob use test so false positive is bounded by 1/2**prob +* @return random prime with the specified criteria +*/ +BigInt BOTAN_PUBLIC_API(2,0) random_prime(RandomNumberGenerator& rng, + size_t bits, + const BigInt& coprime = 0, + size_t equiv = 1, + size_t equiv_mod = 2, + size_t prob = 128); + +/** +* Generate a prime suitable for RSA p/q +* @param keygen_rng a random number generator +* @param prime_test_rng a random number generator +* @param bits how large the resulting prime should be in bits (must be >= 512) +* @param coprime a positive integer that (prime - 1) should be coprime to +* @param prob use test so false positive is bounded by 1/2**prob +* @return random prime with the specified criteria +*/ +BigInt BOTAN_PUBLIC_API(2,7) generate_rsa_prime(RandomNumberGenerator& keygen_rng, + RandomNumberGenerator& prime_test_rng, + size_t bits, + const BigInt& coprime, + size_t prob = 128); + +/** +* Return a 'safe' prime, of the form p=2*q+1 with q prime +* @param rng a random number generator +* @param bits is how long the resulting prime should be +* @return prime randomly chosen from safe primes of length bits +*/ +BigInt BOTAN_PUBLIC_API(2,0) random_safe_prime(RandomNumberGenerator& rng, + size_t bits); + +/** +* Generate DSA parameters using the FIPS 186 kosherizer +* @param rng a random number generator +* @param p_out where the prime p will be stored +* @param q_out where the prime q will be stored +* @param pbits how long p will be in bits +* @param qbits how long q will be in bits +* @return random seed used to generate this parameter set +*/ +std::vector<uint8_t> BOTAN_PUBLIC_API(2,0) BOTAN_DEPRECATED("Use DL_Group") +generate_dsa_primes(RandomNumberGenerator& rng, + BigInt& p_out, BigInt& q_out, + size_t pbits, size_t qbits); + +/** +* Generate DSA parameters using the FIPS 186 kosherizer +* @param rng a random number generator +* @param p_out where the prime p will be stored +* @param q_out where the prime q will be stored +* @param pbits how long p will be in bits +* @param qbits how long q will be in bits +* @param seed the seed used to generate the parameters +* @param offset optional offset from seed to start searching at +* @return true if seed generated a valid DSA parameter set, otherwise + false. p_out and q_out are only valid if true was returned. +*/ +bool BOTAN_PUBLIC_API(2,0) BOTAN_DEPRECATED("Use DL_Group") +generate_dsa_primes(RandomNumberGenerator& rng, + BigInt& p_out, BigInt& q_out, + size_t pbits, size_t qbits, + const std::vector<uint8_t>& seed, + size_t offset = 0); + +/** +* The size of the PRIMES[] array +*/ +const size_t PRIME_TABLE_SIZE = 6541; + +/** +* A const array of all odd primes less than 65535 +*/ +extern const uint16_t BOTAN_PUBLIC_API(2,0) PRIMES[]; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/math/numbertheory/pow_mod.cpp b/comm/third_party/botan/src/lib/math/numbertheory/pow_mod.cpp new file mode 100644 index 0000000000..7b38fad1d8 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/pow_mod.cpp @@ -0,0 +1,328 @@ +/* +* Modular Exponentiation Proxy +* (C) 1999-2007,2012,2018,2019 Jack Lloyd +* 2016 Matthias Gierlings +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/pow_mod.h> +#include <botan/numthry.h> +#include <botan/reducer.h> +#include <botan/monty.h> +#include <botan/internal/monty_exp.h> +#include <botan/internal/rounding.h> +#include <vector> + +namespace Botan { + +class Modular_Exponentiator + { + public: + virtual void set_base(const BigInt&) = 0; + virtual void set_exponent(const BigInt&) = 0; + virtual BigInt execute() const = 0; + virtual Modular_Exponentiator* copy() const = 0; + + Modular_Exponentiator() = default; + Modular_Exponentiator(const Modular_Exponentiator&) = default; + Modular_Exponentiator & operator=(const Modular_Exponentiator&) = default; + virtual ~Modular_Exponentiator() = default; + }; + +namespace { + +/** +* Fixed Window Exponentiator +*/ +class Fixed_Window_Exponentiator final : public Modular_Exponentiator + { + public: + void set_exponent(const BigInt& e) override { m_exp = e; } + void set_base(const BigInt&) override; + BigInt execute() const override; + + Modular_Exponentiator* copy() const override + { return new Fixed_Window_Exponentiator(*this); } + + Fixed_Window_Exponentiator(const BigInt&, Power_Mod::Usage_Hints); + private: + Modular_Reducer m_reducer; + BigInt m_exp; + size_t m_window_bits; + std::vector<BigInt> m_g; + Power_Mod::Usage_Hints m_hints; + }; + +void Fixed_Window_Exponentiator::set_base(const BigInt& base) + { + m_window_bits = Power_Mod::window_bits(m_exp.bits(), base.bits(), m_hints); + + m_g.resize(static_cast<size_t>(1) << m_window_bits); + m_g[0] = 1; + m_g[1] = m_reducer.reduce(base); + + for(size_t i = 2; i != m_g.size(); ++i) + m_g[i] = m_reducer.multiply(m_g[i-1], m_g[1]); + } + +BigInt Fixed_Window_Exponentiator::execute() const + { + const size_t exp_nibbles = (m_exp.bits() + m_window_bits - 1) / m_window_bits; + + BigInt x = 1; + + for(size_t i = exp_nibbles; i > 0; --i) + { + for(size_t j = 0; j != m_window_bits; ++j) + x = m_reducer.square(x); + + const uint32_t nibble = m_exp.get_substring(m_window_bits*(i-1), m_window_bits); + + // not const time: + x = m_reducer.multiply(x, m_g[nibble]); + } + return x; + } + +/* +* Fixed_Window_Exponentiator Constructor +*/ +Fixed_Window_Exponentiator::Fixed_Window_Exponentiator(const BigInt& n, + Power_Mod::Usage_Hints hints) + : m_reducer{Modular_Reducer(n)}, m_exp{}, m_window_bits{}, m_g{}, m_hints{hints} + {} + +class Montgomery_Exponentiator final : public Modular_Exponentiator + { + public: + void set_exponent(const BigInt& e) override { m_e = e; } + void set_base(const BigInt&) override; + BigInt execute() const override; + + Modular_Exponentiator* copy() const override + { return new Montgomery_Exponentiator(*this); } + + Montgomery_Exponentiator(const BigInt&, Power_Mod::Usage_Hints); + private: + BigInt m_p; + Modular_Reducer m_mod_p; + std::shared_ptr<const Montgomery_Params> m_monty_params; + std::shared_ptr<const Montgomery_Exponentation_State> m_monty; + + BigInt m_e; + Power_Mod::Usage_Hints m_hints; + }; + +void Montgomery_Exponentiator::set_base(const BigInt& base) + { + size_t window_bits = Power_Mod::window_bits(m_e.bits(), base.bits(), m_hints); + m_monty = monty_precompute(m_monty_params, m_mod_p.reduce(base), window_bits); + } + +BigInt Montgomery_Exponentiator::execute() const + { + /* + This leaks size of e via loop iterations, not possible to fix without + breaking this API. Round up to avoid leaking fine details. + */ + return monty_execute(*m_monty, m_e, round_up(m_e.bits(), 8)); + } + +Montgomery_Exponentiator::Montgomery_Exponentiator(const BigInt& mod, + Power_Mod::Usage_Hints hints) : + m_p(mod), + m_mod_p(mod), + m_monty_params(std::make_shared<Montgomery_Params>(m_p, m_mod_p)), + m_hints(hints) + { + } + +} + +/* +* Power_Mod Constructor +*/ +Power_Mod::Power_Mod(const BigInt& n, Usage_Hints hints, bool disable_monty) + { + set_modulus(n, hints, disable_monty); + } + +Power_Mod::~Power_Mod() { /* for ~unique_ptr */ } + +/* +* Power_Mod Copy Constructor +*/ +Power_Mod::Power_Mod(const Power_Mod& other) + { + if(other.m_core.get()) + m_core.reset(other.m_core->copy()); + } + +/* +* Power_Mod Assignment Operator +*/ +Power_Mod& Power_Mod::operator=(const Power_Mod& other) + { + if(this != &other) + { + if(other.m_core) + m_core.reset(other.m_core->copy()); + else + m_core.reset(); + } + return (*this); + } + +/* +* Set the modulus +*/ +void Power_Mod::set_modulus(const BigInt& n, Usage_Hints hints, bool disable_monty) const + { + // Allow set_modulus(0) to mean "drop old state" + + m_core.reset(); + + if(n != 0) + { + if(n.is_odd() && disable_monty == false) + m_core.reset(new Montgomery_Exponentiator(n, hints)); + else + m_core.reset(new Fixed_Window_Exponentiator(n, hints)); + } + } + +/* +* Set the base +*/ +void Power_Mod::set_base(const BigInt& b) const + { + if(b.is_negative()) + throw Invalid_Argument("Power_Mod::set_base: arg must be non-negative"); + + if(!m_core) + throw Internal_Error("Power_Mod::set_base: m_core was NULL"); + m_core->set_base(b); + } + +/* +* Set the exponent +*/ +void Power_Mod::set_exponent(const BigInt& e) const + { + if(e.is_negative()) + throw Invalid_Argument("Power_Mod::set_exponent: arg must be > 0"); + + if(!m_core) + throw Internal_Error("Power_Mod::set_exponent: m_core was NULL"); + m_core->set_exponent(e); + } + +/* +* Compute the result +*/ +BigInt Power_Mod::execute() const + { + if(!m_core) + throw Internal_Error("Power_Mod::execute: m_core was NULL"); + return m_core->execute(); + } + +/* +* Try to choose a good window size +*/ +size_t Power_Mod::window_bits(size_t exp_bits, size_t, + Power_Mod::Usage_Hints hints) + { + static const size_t wsize[][2] = { + { 1434, 7 }, + { 539, 6 }, + { 197, 4 }, + { 70, 3 }, + { 17, 2 }, + { 0, 0 } + }; + + size_t window_bits = 1; + + if(exp_bits) + { + for(size_t j = 0; wsize[j][0]; ++j) + { + if(exp_bits >= wsize[j][0]) + { + window_bits += wsize[j][1]; + break; + } + } + } + + if(hints & Power_Mod::BASE_IS_FIXED) + window_bits += 2; + if(hints & Power_Mod::EXP_IS_LARGE) + ++window_bits; + + return window_bits; + } + +namespace { + +/* +* Choose potentially useful hints +*/ +Power_Mod::Usage_Hints choose_base_hints(const BigInt& b, const BigInt& n) + { + if(b == 2) + return Power_Mod::Usage_Hints(Power_Mod::BASE_IS_2 | + Power_Mod::BASE_IS_SMALL); + + const size_t b_bits = b.bits(); + const size_t n_bits = n.bits(); + + if(b_bits < n_bits / 32) + return Power_Mod::BASE_IS_SMALL; + if(b_bits > n_bits / 4) + return Power_Mod::BASE_IS_LARGE; + + return Power_Mod::NO_HINTS; + } + +/* +* Choose potentially useful hints +*/ +Power_Mod::Usage_Hints choose_exp_hints(const BigInt& e, const BigInt& n) + { + const size_t e_bits = e.bits(); + const size_t n_bits = n.bits(); + + if(e_bits < n_bits / 32) + return Power_Mod::BASE_IS_SMALL; + if(e_bits > n_bits / 4) + return Power_Mod::BASE_IS_LARGE; + return Power_Mod::NO_HINTS; + } + +} + +/* +* Fixed_Exponent_Power_Mod Constructor +*/ +Fixed_Exponent_Power_Mod::Fixed_Exponent_Power_Mod(const BigInt& e, + const BigInt& n, + Usage_Hints hints) : + Power_Mod(n, Usage_Hints(hints | EXP_IS_FIXED | choose_exp_hints(e, n))) + { + set_exponent(e); + } + +/* +* Fixed_Base_Power_Mod Constructor +*/ +Fixed_Base_Power_Mod::Fixed_Base_Power_Mod(const BigInt& b, const BigInt& n, + Usage_Hints hints) : + Power_Mod(n, Usage_Hints(hints | BASE_IS_FIXED | choose_base_hints(b, n))) + { + set_base(b); + } + +} diff --git a/comm/third_party/botan/src/lib/math/numbertheory/pow_mod.h b/comm/third_party/botan/src/lib/math/numbertheory/pow_mod.h new file mode 100644 index 0000000000..b465013e55 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/pow_mod.h @@ -0,0 +1,122 @@ +/* +* Modular Exponentiator +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_POWER_MOD_H_ +#define BOTAN_POWER_MOD_H_ + +#include <botan/bigint.h> + +BOTAN_FUTURE_INTERNAL_HEADER(pow_mod.h) + +namespace Botan { + +class Modular_Exponentiator; + +/** +* Modular Exponentiator Proxy +*/ +class BOTAN_PUBLIC_API(2,0) Power_Mod + { + public: + + enum Usage_Hints { + NO_HINTS = 0x0000, + + BASE_IS_FIXED = 0x0001, + BASE_IS_SMALL = 0x0002, + BASE_IS_LARGE = 0x0004, + BASE_IS_2 = 0x0008, + + EXP_IS_FIXED = 0x0100, + EXP_IS_SMALL = 0x0200, + EXP_IS_LARGE = 0x0400 + }; + + /* + * Try to choose a good window size + */ + static size_t window_bits(size_t exp_bits, size_t base_bits, + Power_Mod::Usage_Hints hints); + + /** + * @param modulus the modulus + * @param hints Passed to set_modulus if modulus > 0 + * @param disable_montgomery_arith Disables use of Montgomery + * representation. Likely only useful for testing. + */ + void set_modulus(const BigInt& modulus, + Usage_Hints hints = NO_HINTS, + bool disable_montgomery_arith = false) const; + + /** + * Set the base + */ + void set_base(const BigInt& base) const; + + /** + * Set the exponent + */ + void set_exponent(const BigInt& exponent) const; + + /** + * All three of the above functions must have already been called. + * @return result of g^x%p + */ + BigInt execute() const; + + Power_Mod& operator=(const Power_Mod&); + + /** + * @param modulus Optionally call set_modulus + * @param hints Passed to set_modulus if modulus > 0 + * @param disable_montgomery_arith Disables use of Montgomery + * representation. Likely only useful for testing. + */ + Power_Mod(const BigInt& modulus = 0, + Usage_Hints hints = NO_HINTS, + bool disable_montgomery_arith = false); + Power_Mod(const Power_Mod&); + virtual ~Power_Mod(); + private: + mutable std::unique_ptr<Modular_Exponentiator> m_core; + }; + +/** +* Fixed Exponent Modular Exponentiator Proxy +*/ +class BOTAN_PUBLIC_API(2,0) Fixed_Exponent_Power_Mod final : public Power_Mod + { + public: + BigInt operator()(const BigInt& b) const + { set_base(b); return execute(); } + + Fixed_Exponent_Power_Mod() = default; + + Fixed_Exponent_Power_Mod(const BigInt& exponent, + const BigInt& modulus, + Usage_Hints hints = NO_HINTS); + }; + +/** +* Fixed Base Modular Exponentiator Proxy +*/ +class BOTAN_PUBLIC_API(2,0) Fixed_Base_Power_Mod final : public Power_Mod + { + public: + BigInt operator()(const BigInt& e) const + { set_exponent(e); return execute(); } + + Fixed_Base_Power_Mod() = default; + + Fixed_Base_Power_Mod(const BigInt& base, + const BigInt& modulus, + Usage_Hints hints = NO_HINTS); + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/math/numbertheory/primality.cpp b/comm/third_party/botan/src/lib/math/numbertheory/primality.cpp new file mode 100644 index 0000000000..eb2be42b13 --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/primality.cpp @@ -0,0 +1,203 @@ +/* +* (C) 2016,2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/internal/primality.h> +#include <botan/internal/monty_exp.h> +#include <botan/bigint.h> +#include <botan/monty.h> +#include <botan/reducer.h> +#include <botan/rng.h> +#include <algorithm> + +namespace Botan { + +bool is_lucas_probable_prime(const BigInt& C, const Modular_Reducer& mod_C) + { + if(C <= 1) + return false; + else if(C == 2) + return true; + else if(C.is_even()) + return false; + else if(C == 3 || C == 5 || C == 7 || C == 11 || C == 13) + return true; + + BigInt D = 5; + + for(;;) + { + int32_t j = jacobi(D, C); + if(j == 0) + return false; + + if(j == -1) + break; + + // Check 5, -7, 9, -11, 13, -15, 17, ... + if(D.is_negative()) + { + D.flip_sign(); + D += 2; + } + else + { + D += 2; + D.flip_sign(); + } + + if(D == 17 && is_perfect_square(C).is_nonzero()) + return false; + } + + const BigInt K = C + 1; + const size_t K_bits = K.bits() - 1; + + BigInt U = 1; + BigInt V = 1; + + BigInt Ut, Vt, U2, V2; + + for(size_t i = 0; i != K_bits; ++i) + { + const bool k_bit = K.get_bit(K_bits - 1 - i); + + Ut = mod_C.multiply(U, V); + + Vt = mod_C.reduce(mod_C.square(V) + mod_C.multiply(D, mod_C.square(U))); + Vt.ct_cond_add(Vt.is_odd(), C); + Vt >>= 1; + Vt = mod_C.reduce(Vt); + + U = Ut; + V = Vt; + + U2 = mod_C.reduce(Ut + Vt); + U2.ct_cond_add(U2.is_odd(), C); + U2 >>= 1; + + V2 = mod_C.reduce(Vt + Ut*D); + V2.ct_cond_add(V2.is_odd(), C); + V2 >>= 1; + + U.ct_cond_assign(k_bit, U2); + V.ct_cond_assign(k_bit, V2); + } + + return (U == 0); + } + +bool is_bailie_psw_probable_prime(const BigInt& n, const Modular_Reducer& mod_n) + { + auto monty_n = std::make_shared<Montgomery_Params>(n, mod_n); + return passes_miller_rabin_test(n, mod_n, monty_n, 2) && is_lucas_probable_prime(n, mod_n); + } + +bool is_bailie_psw_probable_prime(const BigInt& n) + { + Modular_Reducer mod_n(n); + return is_bailie_psw_probable_prime(n, mod_n); + } + +bool passes_miller_rabin_test(const BigInt& n, + const Modular_Reducer& mod_n, + const std::shared_ptr<Montgomery_Params>& monty_n, + const BigInt& a) + { + BOTAN_ASSERT_NOMSG(n > 1); + + const BigInt n_minus_1 = n - 1; + const size_t s = low_zero_bits(n_minus_1); + const BigInt nm1_s = n_minus_1 >> s; + const size_t n_bits = n.bits(); + + const size_t powm_window = 4; + + auto powm_a_n = monty_precompute(monty_n, a, powm_window); + + BigInt y = monty_execute(*powm_a_n, nm1_s, n_bits); + + if(y == 1 || y == n_minus_1) + return true; + + for(size_t i = 1; i != s; ++i) + { + y = mod_n.square(y); + + if(y == 1) // found a non-trivial square root + return false; + + /* + -1 is the trivial square root of unity, so ``a`` is not a + witness for this number - give up + */ + if(y == n_minus_1) + return true; + } + + return false; + } + +bool is_miller_rabin_probable_prime(const BigInt& n, + const Modular_Reducer& mod_n, + RandomNumberGenerator& rng, + size_t test_iterations) + { + BOTAN_ASSERT_NOMSG(n > 1); + + auto monty_n = std::make_shared<Montgomery_Params>(n, mod_n); + + for(size_t i = 0; i != test_iterations; ++i) + { + const BigInt a = BigInt::random_integer(rng, 2, n); + + if(!passes_miller_rabin_test(n, mod_n, monty_n, a)) + return false; + } + + // Failed to find a counterexample + return true; + } + + +size_t miller_rabin_test_iterations(size_t n_bits, size_t prob, bool random) + { + const size_t base = (prob + 2) / 2; // worst case 4^-t error rate + + /* + * If the candidate prime was maliciously constructed, we can't rely + * on arguments based on p being random. + */ + if(random == false) + return base; + + /* + * For randomly chosen numbers we can use the estimates from + * http://www.math.dartmouth.edu/~carlp/PDF/paper88.pdf + * + * These values are derived from the inequality for p(k,t) given on + * the second page. + */ + if(prob <= 128) + { + if(n_bits >= 1536) + return 4; // < 2^-133 + if(n_bits >= 1024) + return 6; // < 2^-133 + if(n_bits >= 512) + return 12; // < 2^-129 + if(n_bits >= 256) + return 29; // < 2^-128 + } + + /* + If the user desires a smaller error probability than we have + precomputed error estimates for, just fall back to using the worst + case error rate. + */ + return base; + } + +} diff --git a/comm/third_party/botan/src/lib/math/numbertheory/primality.h b/comm/third_party/botan/src/lib/math/numbertheory/primality.h new file mode 100644 index 0000000000..db7a76a74d --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/primality.h @@ -0,0 +1,100 @@ +/* +* (C) 2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_PRIMALITY_TEST_H_ +#define BOTAN_PRIMALITY_TEST_H_ + +#include <botan/types.h> +#include <memory> + +namespace Botan { + +class BigInt; +class Modular_Reducer; +class Montgomery_Params; +class RandomNumberGenerator; + +/** +* Perform Lucas primality test +* @see FIPS 186-4 C.3.3 +* +* @warning it is possible to construct composite integers which pass +* this test alone. +* +* @param n the positive integer to test +* @param mod_n a pre-created Modular_Reducer for n +* @return true if n seems probably prime, false if n is composite +*/ +bool BOTAN_TEST_API is_lucas_probable_prime(const BigInt& n, const Modular_Reducer& mod_n); + +/** +* Perform Bailie-PSW primality test +* +* This is a combination of Miller-Rabin with base 2 and a Lucas test. No known +* composite integer passes both tests, though it is conjectured that infinitely +* many composite counterexamples exist. +* +* @param n the positive integer to test +* @param mod_n a pre-created Modular_Reducer for n +* @return true if n seems probably prime, false if n is composite +*/ +bool BOTAN_TEST_API is_bailie_psw_probable_prime(const BigInt& n, const Modular_Reducer& mod_n); + +/** +* Perform Bailie-PSW primality test +* +* This is a combination of Miller-Rabin with base 2 and a Lucas test. No known +* composite integer passes both tests, though it is conjectured that infinitely +* many composite counterexamples exist. +* +* @param n the positive integer to test +* @return true if n seems probably prime, false if n is composite +*/ +bool is_bailie_psw_probable_prime(const BigInt& n); + +/** +* Return required number of Miller-Rabin tests in order to +* reach the specified probability of error. +* +* @param n_bits the bit-length of the integer being tested +* @param prob chance of false positive is bounded by 1/2**prob +* @param random is set if (and only if) the integer was randomly generated by us +* and thus cannot have been maliciously constructed. +*/ +size_t miller_rabin_test_iterations(size_t n_bits, size_t prob, bool random); + +/** +* Perform a single Miller-Rabin test with specified base +* +* @param n the positive integer to test +* @param mod_n a pre-created Modular_Reducer for n +* @param monty_n Montgomery parameters for n +* @param a the base to check +* @return result of primality test +*/ +bool passes_miller_rabin_test(const BigInt& n, + const Modular_Reducer& mod_n, + const std::shared_ptr<Montgomery_Params>& monty_n, + const BigInt& a); + +/** +* Perform t iterations of a Miller-Rabin primality test with random bases +* +* @param n the positive integer to test +* @param mod_n a pre-created Modular_Reducer for n +* @param rng a random number generator +* @param t number of tests to perform +* +* @return result of primality test +*/ +bool BOTAN_TEST_API is_miller_rabin_probable_prime(const BigInt& n, + const Modular_Reducer& mod_n, + RandomNumberGenerator& rng, + size_t t); + +} + +#endif diff --git a/comm/third_party/botan/src/lib/math/numbertheory/primes.cpp b/comm/third_party/botan/src/lib/math/numbertheory/primes.cpp new file mode 100644 index 0000000000..4a3eb46f2c --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/primes.cpp @@ -0,0 +1,609 @@ +/* +* Small Primes Table +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/numthry.h> + +namespace Botan { + +const uint16_t PRIMES[PRIME_TABLE_SIZE+1] = { + 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, + 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, + 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, + 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, + 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, + 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, + 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, + 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, + 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, + 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, + 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673, + 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, + 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, + 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, + 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, + 991, 997, 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, 1051, + 1061, 1063, 1069, 1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, + 1129, 1151, 1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, 1217, + 1223, 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, 1289, 1291, + 1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 1367, 1373, 1381, + 1399, 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459, + 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511, 1523, 1531, 1543, + 1549, 1553, 1559, 1567, 1571, 1579, 1583, 1597, 1601, 1607, 1609, + 1613, 1619, 1621, 1627, 1637, 1657, 1663, 1667, 1669, 1693, 1697, + 1699, 1709, 1721, 1723, 1733, 1741, 1747, 1753, 1759, 1777, 1783, + 1787, 1789, 1801, 1811, 1823, 1831, 1847, 1861, 1867, 1871, 1873, + 1877, 1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949, 1951, 1973, + 1979, 1987, 1993, 1997, 1999, 2003, 2011, 2017, 2027, 2029, 2039, + 2053, 2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129, + 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 2207, 2213, 2221, + 2237, 2239, 2243, 2251, 2267, 2269, 2273, 2281, 2287, 2293, 2297, + 2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357, 2371, 2377, 2381, + 2383, 2389, 2393, 2399, 2411, 2417, 2423, 2437, 2441, 2447, 2459, + 2467, 2473, 2477, 2503, 2521, 2531, 2539, 2543, 2549, 2551, 2557, + 2579, 2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657, 2659, 2663, + 2671, 2677, 2683, 2687, 2689, 2693, 2699, 2707, 2711, 2713, 2719, + 2729, 2731, 2741, 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, + 2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861, 2879, 2887, 2897, + 2903, 2909, 2917, 2927, 2939, 2953, 2957, 2963, 2969, 2971, 2999, + 3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079, 3083, + 3089, 3109, 3119, 3121, 3137, 3163, 3167, 3169, 3181, 3187, 3191, + 3203, 3209, 3217, 3221, 3229, 3251, 3253, 3257, 3259, 3271, 3299, + 3301, 3307, 3313, 3319, 3323, 3329, 3331, 3343, 3347, 3359, 3361, + 3371, 3373, 3389, 3391, 3407, 3413, 3433, 3449, 3457, 3461, 3463, + 3467, 3469, 3491, 3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541, + 3547, 3557, 3559, 3571, 3581, 3583, 3593, 3607, 3613, 3617, 3623, + 3631, 3637, 3643, 3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, + 3719, 3727, 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, 3803, + 3821, 3823, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 3889, 3907, + 3911, 3917, 3919, 3923, 3929, 3931, 3943, 3947, 3967, 3989, 4001, + 4003, 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057, 4073, 4079, + 4091, 4093, 4099, 4111, 4127, 4129, 4133, 4139, 4153, 4157, 4159, + 4177, 4201, 4211, 4217, 4219, 4229, 4231, 4241, 4243, 4253, 4259, + 4261, 4271, 4273, 4283, 4289, 4297, 4327, 4337, 4339, 4349, 4357, + 4363, 4373, 4391, 4397, 4409, 4421, 4423, 4441, 4447, 4451, 4457, + 4463, 4481, 4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547, 4549, + 4561, 4567, 4583, 4591, 4597, 4603, 4621, 4637, 4639, 4643, 4649, + 4651, 4657, 4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, 4733, + 4751, 4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, 4817, 4831, + 4861, 4871, 4877, 4889, 4903, 4909, 4919, 4931, 4933, 4937, 4943, + 4951, 4957, 4967, 4969, 4973, 4987, 4993, 4999, 5003, 5009, 5011, + 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087, 5099, 5101, 5107, + 5113, 5119, 5147, 5153, 5167, 5171, 5179, 5189, 5197, 5209, 5227, + 5231, 5233, 5237, 5261, 5273, 5279, 5281, 5297, 5303, 5309, 5323, + 5333, 5347, 5351, 5381, 5387, 5393, 5399, 5407, 5413, 5417, 5419, + 5431, 5437, 5441, 5443, 5449, 5471, 5477, 5479, 5483, 5501, 5503, + 5507, 5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573, 5581, 5591, + 5623, 5639, 5641, 5647, 5651, 5653, 5657, 5659, 5669, 5683, 5689, + 5693, 5701, 5711, 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791, + 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, 5851, 5857, 5861, + 5867, 5869, 5879, 5881, 5897, 5903, 5923, 5927, 5939, 5953, 5981, + 5987, 6007, 6011, 6029, 6037, 6043, 6047, 6053, 6067, 6073, 6079, + 6089, 6091, 6101, 6113, 6121, 6131, 6133, 6143, 6151, 6163, 6173, + 6197, 6199, 6203, 6211, 6217, 6221, 6229, 6247, 6257, 6263, 6269, + 6271, 6277, 6287, 6299, 6301, 6311, 6317, 6323, 6329, 6337, 6343, + 6353, 6359, 6361, 6367, 6373, 6379, 6389, 6397, 6421, 6427, 6449, + 6451, 6469, 6473, 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, + 6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637, 6653, 6659, 6661, + 6673, 6679, 6689, 6691, 6701, 6703, 6709, 6719, 6733, 6737, 6761, + 6763, 6779, 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833, 6841, + 6857, 6863, 6869, 6871, 6883, 6899, 6907, 6911, 6917, 6947, 6949, + 6959, 6961, 6967, 6971, 6977, 6983, 6991, 6997, 7001, 7013, 7019, + 7027, 7039, 7043, 7057, 7069, 7079, 7103, 7109, 7121, 7127, 7129, + 7151, 7159, 7177, 7187, 7193, 7207, 7211, 7213, 7219, 7229, 7237, + 7243, 7247, 7253, 7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349, + 7351, 7369, 7393, 7411, 7417, 7433, 7451, 7457, 7459, 7477, 7481, + 7487, 7489, 7499, 7507, 7517, 7523, 7529, 7537, 7541, 7547, 7549, + 7559, 7561, 7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, 7639, + 7643, 7649, 7669, 7673, 7681, 7687, 7691, 7699, 7703, 7717, 7723, + 7727, 7741, 7753, 7757, 7759, 7789, 7793, 7817, 7823, 7829, 7841, + 7853, 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919, 7927, 7933, + 7937, 7949, 7951, 7963, 7993, 8009, 8011, 8017, 8039, 8053, 8059, + 8069, 8081, 8087, 8089, 8093, 8101, 8111, 8117, 8123, 8147, 8161, + 8167, 8171, 8179, 8191, 8209, 8219, 8221, 8231, 8233, 8237, 8243, + 8263, 8269, 8273, 8287, 8291, 8293, 8297, 8311, 8317, 8329, 8353, + 8363, 8369, 8377, 8387, 8389, 8419, 8423, 8429, 8431, 8443, 8447, + 8461, 8467, 8501, 8513, 8521, 8527, 8537, 8539, 8543, 8563, 8573, + 8581, 8597, 8599, 8609, 8623, 8627, 8629, 8641, 8647, 8663, 8669, + 8677, 8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731, 8737, 8741, + 8747, 8753, 8761, 8779, 8783, 8803, 8807, 8819, 8821, 8831, 8837, + 8839, 8849, 8861, 8863, 8867, 8887, 8893, 8923, 8929, 8933, 8941, + 8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011, 9013, 9029, 9041, + 9043, 9049, 9059, 9067, 9091, 9103, 9109, 9127, 9133, 9137, 9151, + 9157, 9161, 9173, 9181, 9187, 9199, 9203, 9209, 9221, 9227, 9239, + 9241, 9257, 9277, 9281, 9283, 9293, 9311, 9319, 9323, 9337, 9341, + 9343, 9349, 9371, 9377, 9391, 9397, 9403, 9413, 9419, 9421, 9431, + 9433, 9437, 9439, 9461, 9463, 9467, 9473, 9479, 9491, 9497, 9511, + 9521, 9533, 9539, 9547, 9551, 9587, 9601, 9613, 9619, 9623, 9629, + 9631, 9643, 9649, 9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733, + 9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, 9803, 9811, 9817, + 9829, 9833, 9839, 9851, 9857, 9859, 9871, 9883, 9887, 9901, 9907, + 9923, 9929, 9931, 9941, 9949, 9967, 9973, 10007, 10009, 10037, 10039, +10061, 10067, 10069, 10079, 10091, 10093, 10099, 10103, 10111, 10133, 10139, +10141, 10151, 10159, 10163, 10169, 10177, 10181, 10193, 10211, 10223, 10243, +10247, 10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303, 10313, 10321, +10331, 10333, 10337, 10343, 10357, 10369, 10391, 10399, 10427, 10429, 10433, +10453, 10457, 10459, 10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531, +10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627, 10631, 10639, 10651, +10657, 10663, 10667, 10687, 10691, 10709, 10711, 10723, 10729, 10733, 10739, +10753, 10771, 10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859, 10861, +10867, 10883, 10889, 10891, 10903, 10909, 10937, 10939, 10949, 10957, 10973, +10979, 10987, 10993, 11003, 11027, 11047, 11057, 11059, 11069, 11071, 11083, +11087, 11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161, 11171, 11173, +11177, 11197, 11213, 11239, 11243, 11251, 11257, 11261, 11273, 11279, 11287, +11299, 11311, 11317, 11321, 11329, 11351, 11353, 11369, 11383, 11393, 11399, +11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483, 11489, 11491, 11497, +11503, 11519, 11527, 11549, 11551, 11579, 11587, 11593, 11597, 11617, 11621, +11633, 11657, 11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731, 11743, +11777, 11779, 11783, 11789, 11801, 11807, 11813, 11821, 11827, 11831, 11833, +11839, 11863, 11867, 11887, 11897, 11903, 11909, 11923, 11927, 11933, 11939, +11941, 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011, 12037, 12041, +12043, 12049, 12071, 12073, 12097, 12101, 12107, 12109, 12113, 12119, 12143, +12149, 12157, 12161, 12163, 12197, 12203, 12211, 12227, 12239, 12241, 12251, +12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323, 12329, 12343, 12347, +12373, 12377, 12379, 12391, 12401, 12409, 12413, 12421, 12433, 12437, 12451, +12457, 12473, 12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527, 12539, +12541, 12547, 12553, 12569, 12577, 12583, 12589, 12601, 12611, 12613, 12619, +12637, 12641, 12647, 12653, 12659, 12671, 12689, 12697, 12703, 12713, 12721, +12739, 12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821, 12823, 12829, +12841, 12853, 12889, 12893, 12899, 12907, 12911, 12917, 12919, 12923, 12941, +12953, 12959, 12967, 12973, 12979, 12983, 13001, 13003, 13007, 13009, 13033, +13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109, 13121, 13127, 13147, +13151, 13159, 13163, 13171, 13177, 13183, 13187, 13217, 13219, 13229, 13241, +13249, 13259, 13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337, 13339, +13367, 13381, 13397, 13399, 13411, 13417, 13421, 13441, 13451, 13457, 13463, +13469, 13477, 13487, 13499, 13513, 13523, 13537, 13553, 13567, 13577, 13591, +13597, 13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681, 13687, 13691, +13693, 13697, 13709, 13711, 13721, 13723, 13729, 13751, 13757, 13759, 13763, +13781, 13789, 13799, 13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879, +13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933, 13963, 13967, 13997, +13999, 14009, 14011, 14029, 14033, 14051, 14057, 14071, 14081, 14083, 14087, +14107, 14143, 14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221, 14243, +14249, 14251, 14281, 14293, 14303, 14321, 14323, 14327, 14341, 14347, 14369, +14387, 14389, 14401, 14407, 14411, 14419, 14423, 14431, 14437, 14447, 14449, +14461, 14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549, 14551, 14557, +14561, 14563, 14591, 14593, 14621, 14627, 14629, 14633, 14639, 14653, 14657, +14669, 14683, 14699, 14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753, +14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821, 14827, 14831, 14843, +14851, 14867, 14869, 14879, 14887, 14891, 14897, 14923, 14929, 14939, 14947, +14951, 14957, 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073, 15077, +15083, 15091, 15101, 15107, 15121, 15131, 15137, 15139, 15149, 15161, 15173, +15187, 15193, 15199, 15217, 15227, 15233, 15241, 15259, 15263, 15269, 15271, +15277, 15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331, 15349, 15359, +15361, 15373, 15377, 15383, 15391, 15401, 15413, 15427, 15439, 15443, 15451, +15461, 15467, 15473, 15493, 15497, 15511, 15527, 15541, 15551, 15559, 15569, +15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643, 15647, 15649, 15661, +15667, 15671, 15679, 15683, 15727, 15731, 15733, 15737, 15739, 15749, 15761, +15767, 15773, 15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859, 15877, +15881, 15887, 15889, 15901, 15907, 15913, 15919, 15923, 15937, 15959, 15971, +15973, 15991, 16001, 16007, 16033, 16057, 16061, 16063, 16067, 16069, 16073, +16087, 16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183, 16187, 16189, +16193, 16217, 16223, 16229, 16231, 16249, 16253, 16267, 16273, 16301, 16319, +16333, 16339, 16349, 16361, 16363, 16369, 16381, 16411, 16417, 16421, 16427, +16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493, 16519, 16529, 16547, +16553, 16561, 16567, 16573, 16603, 16607, 16619, 16631, 16633, 16649, 16651, +16657, 16661, 16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747, 16759, +16763, 16787, 16811, 16823, 16829, 16831, 16843, 16871, 16879, 16883, 16889, +16901, 16903, 16921, 16927, 16931, 16937, 16943, 16963, 16979, 16981, 16987, +16993, 17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053, 17077, 17093, +17099, 17107, 17117, 17123, 17137, 17159, 17167, 17183, 17189, 17191, 17203, +17207, 17209, 17231, 17239, 17257, 17291, 17293, 17299, 17317, 17321, 17327, +17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389, 17393, 17401, 17417, +17419, 17431, 17443, 17449, 17467, 17471, 17477, 17483, 17489, 17491, 17497, +17509, 17519, 17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599, 17609, +17623, 17627, 17657, 17659, 17669, 17681, 17683, 17707, 17713, 17729, 17737, +17747, 17749, 17761, 17783, 17789, 17791, 17807, 17827, 17837, 17839, 17851, +17863, 17881, 17891, 17903, 17909, 17911, 17921, 17923, 17929, 17939, 17957, +17959, 17971, 17977, 17981, 17987, 17989, 18013, 18041, 18043, 18047, 18049, +18059, 18061, 18077, 18089, 18097, 18119, 18121, 18127, 18131, 18133, 18143, +18149, 18169, 18181, 18191, 18199, 18211, 18217, 18223, 18229, 18233, 18251, +18253, 18257, 18269, 18287, 18289, 18301, 18307, 18311, 18313, 18329, 18341, +18353, 18367, 18371, 18379, 18397, 18401, 18413, 18427, 18433, 18439, 18443, +18451, 18457, 18461, 18481, 18493, 18503, 18517, 18521, 18523, 18539, 18541, +18553, 18583, 18587, 18593, 18617, 18637, 18661, 18671, 18679, 18691, 18701, +18713, 18719, 18731, 18743, 18749, 18757, 18773, 18787, 18793, 18797, 18803, +18839, 18859, 18869, 18899, 18911, 18913, 18917, 18919, 18947, 18959, 18973, +18979, 19001, 19009, 19013, 19031, 19037, 19051, 19069, 19073, 19079, 19081, +19087, 19121, 19139, 19141, 19157, 19163, 19181, 19183, 19207, 19211, 19213, +19219, 19231, 19237, 19249, 19259, 19267, 19273, 19289, 19301, 19309, 19319, +19333, 19373, 19379, 19381, 19387, 19391, 19403, 19417, 19421, 19423, 19427, +19429, 19433, 19441, 19447, 19457, 19463, 19469, 19471, 19477, 19483, 19489, +19501, 19507, 19531, 19541, 19543, 19553, 19559, 19571, 19577, 19583, 19597, +19603, 19609, 19661, 19681, 19687, 19697, 19699, 19709, 19717, 19727, 19739, +19751, 19753, 19759, 19763, 19777, 19793, 19801, 19813, 19819, 19841, 19843, +19853, 19861, 19867, 19889, 19891, 19913, 19919, 19927, 19937, 19949, 19961, +19963, 19973, 19979, 19991, 19993, 19997, 20011, 20021, 20023, 20029, 20047, +20051, 20063, 20071, 20089, 20101, 20107, 20113, 20117, 20123, 20129, 20143, +20147, 20149, 20161, 20173, 20177, 20183, 20201, 20219, 20231, 20233, 20249, +20261, 20269, 20287, 20297, 20323, 20327, 20333, 20341, 20347, 20353, 20357, +20359, 20369, 20389, 20393, 20399, 20407, 20411, 20431, 20441, 20443, 20477, +20479, 20483, 20507, 20509, 20521, 20533, 20543, 20549, 20551, 20563, 20593, +20599, 20611, 20627, 20639, 20641, 20663, 20681, 20693, 20707, 20717, 20719, +20731, 20743, 20747, 20749, 20753, 20759, 20771, 20773, 20789, 20807, 20809, +20849, 20857, 20873, 20879, 20887, 20897, 20899, 20903, 20921, 20929, 20939, +20947, 20959, 20963, 20981, 20983, 21001, 21011, 21013, 21017, 21019, 21023, +21031, 21059, 21061, 21067, 21089, 21101, 21107, 21121, 21139, 21143, 21149, +21157, 21163, 21169, 21179, 21187, 21191, 21193, 21211, 21221, 21227, 21247, +21269, 21277, 21283, 21313, 21317, 21319, 21323, 21341, 21347, 21377, 21379, +21383, 21391, 21397, 21401, 21407, 21419, 21433, 21467, 21481, 21487, 21491, +21493, 21499, 21503, 21517, 21521, 21523, 21529, 21557, 21559, 21563, 21569, +21577, 21587, 21589, 21599, 21601, 21611, 21613, 21617, 21647, 21649, 21661, +21673, 21683, 21701, 21713, 21727, 21737, 21739, 21751, 21757, 21767, 21773, +21787, 21799, 21803, 21817, 21821, 21839, 21841, 21851, 21859, 21863, 21871, +21881, 21893, 21911, 21929, 21937, 21943, 21961, 21977, 21991, 21997, 22003, +22013, 22027, 22031, 22037, 22039, 22051, 22063, 22067, 22073, 22079, 22091, +22093, 22109, 22111, 22123, 22129, 22133, 22147, 22153, 22157, 22159, 22171, +22189, 22193, 22229, 22247, 22259, 22271, 22273, 22277, 22279, 22283, 22291, +22303, 22307, 22343, 22349, 22367, 22369, 22381, 22391, 22397, 22409, 22433, +22441, 22447, 22453, 22469, 22481, 22483, 22501, 22511, 22531, 22541, 22543, +22549, 22567, 22571, 22573, 22613, 22619, 22621, 22637, 22639, 22643, 22651, +22669, 22679, 22691, 22697, 22699, 22709, 22717, 22721, 22727, 22739, 22741, +22751, 22769, 22777, 22783, 22787, 22807, 22811, 22817, 22853, 22859, 22861, +22871, 22877, 22901, 22907, 22921, 22937, 22943, 22961, 22963, 22973, 22993, +23003, 23011, 23017, 23021, 23027, 23029, 23039, 23041, 23053, 23057, 23059, +23063, 23071, 23081, 23087, 23099, 23117, 23131, 23143, 23159, 23167, 23173, +23189, 23197, 23201, 23203, 23209, 23227, 23251, 23269, 23279, 23291, 23293, +23297, 23311, 23321, 23327, 23333, 23339, 23357, 23369, 23371, 23399, 23417, +23431, 23447, 23459, 23473, 23497, 23509, 23531, 23537, 23539, 23549, 23557, +23561, 23563, 23567, 23581, 23593, 23599, 23603, 23609, 23623, 23627, 23629, +23633, 23663, 23669, 23671, 23677, 23687, 23689, 23719, 23741, 23743, 23747, +23753, 23761, 23767, 23773, 23789, 23801, 23813, 23819, 23827, 23831, 23833, +23857, 23869, 23873, 23879, 23887, 23893, 23899, 23909, 23911, 23917, 23929, +23957, 23971, 23977, 23981, 23993, 24001, 24007, 24019, 24023, 24029, 24043, +24049, 24061, 24071, 24077, 24083, 24091, 24097, 24103, 24107, 24109, 24113, +24121, 24133, 24137, 24151, 24169, 24179, 24181, 24197, 24203, 24223, 24229, +24239, 24247, 24251, 24281, 24317, 24329, 24337, 24359, 24371, 24373, 24379, +24391, 24407, 24413, 24419, 24421, 24439, 24443, 24469, 24473, 24481, 24499, +24509, 24517, 24527, 24533, 24547, 24551, 24571, 24593, 24611, 24623, 24631, +24659, 24671, 24677, 24683, 24691, 24697, 24709, 24733, 24749, 24763, 24767, +24781, 24793, 24799, 24809, 24821, 24841, 24847, 24851, 24859, 24877, 24889, +24907, 24917, 24919, 24923, 24943, 24953, 24967, 24971, 24977, 24979, 24989, +25013, 25031, 25033, 25037, 25057, 25073, 25087, 25097, 25111, 25117, 25121, +25127, 25147, 25153, 25163, 25169, 25171, 25183, 25189, 25219, 25229, 25237, +25243, 25247, 25253, 25261, 25301, 25303, 25307, 25309, 25321, 25339, 25343, +25349, 25357, 25367, 25373, 25391, 25409, 25411, 25423, 25439, 25447, 25453, +25457, 25463, 25469, 25471, 25523, 25537, 25541, 25561, 25577, 25579, 25583, +25589, 25601, 25603, 25609, 25621, 25633, 25639, 25643, 25657, 25667, 25673, +25679, 25693, 25703, 25717, 25733, 25741, 25747, 25759, 25763, 25771, 25793, +25799, 25801, 25819, 25841, 25847, 25849, 25867, 25873, 25889, 25903, 25913, +25919, 25931, 25933, 25939, 25943, 25951, 25969, 25981, 25997, 25999, 26003, +26017, 26021, 26029, 26041, 26053, 26083, 26099, 26107, 26111, 26113, 26119, +26141, 26153, 26161, 26171, 26177, 26183, 26189, 26203, 26209, 26227, 26237, +26249, 26251, 26261, 26263, 26267, 26293, 26297, 26309, 26317, 26321, 26339, +26347, 26357, 26371, 26387, 26393, 26399, 26407, 26417, 26423, 26431, 26437, +26449, 26459, 26479, 26489, 26497, 26501, 26513, 26539, 26557, 26561, 26573, +26591, 26597, 26627, 26633, 26641, 26647, 26669, 26681, 26683, 26687, 26693, +26699, 26701, 26711, 26713, 26717, 26723, 26729, 26731, 26737, 26759, 26777, +26783, 26801, 26813, 26821, 26833, 26839, 26849, 26861, 26863, 26879, 26881, +26891, 26893, 26903, 26921, 26927, 26947, 26951, 26953, 26959, 26981, 26987, +26993, 27011, 27017, 27031, 27043, 27059, 27061, 27067, 27073, 27077, 27091, +27103, 27107, 27109, 27127, 27143, 27179, 27191, 27197, 27211, 27239, 27241, +27253, 27259, 27271, 27277, 27281, 27283, 27299, 27329, 27337, 27361, 27367, +27397, 27407, 27409, 27427, 27431, 27437, 27449, 27457, 27479, 27481, 27487, +27509, 27527, 27529, 27539, 27541, 27551, 27581, 27583, 27611, 27617, 27631, +27647, 27653, 27673, 27689, 27691, 27697, 27701, 27733, 27737, 27739, 27743, +27749, 27751, 27763, 27767, 27773, 27779, 27791, 27793, 27799, 27803, 27809, +27817, 27823, 27827, 27847, 27851, 27883, 27893, 27901, 27917, 27919, 27941, +27943, 27947, 27953, 27961, 27967, 27983, 27997, 28001, 28019, 28027, 28031, +28051, 28057, 28069, 28081, 28087, 28097, 28099, 28109, 28111, 28123, 28151, +28163, 28181, 28183, 28201, 28211, 28219, 28229, 28277, 28279, 28283, 28289, +28297, 28307, 28309, 28319, 28349, 28351, 28387, 28393, 28403, 28409, 28411, +28429, 28433, 28439, 28447, 28463, 28477, 28493, 28499, 28513, 28517, 28537, +28541, 28547, 28549, 28559, 28571, 28573, 28579, 28591, 28597, 28603, 28607, +28619, 28621, 28627, 28631, 28643, 28649, 28657, 28661, 28663, 28669, 28687, +28697, 28703, 28711, 28723, 28729, 28751, 28753, 28759, 28771, 28789, 28793, +28807, 28813, 28817, 28837, 28843, 28859, 28867, 28871, 28879, 28901, 28909, +28921, 28927, 28933, 28949, 28961, 28979, 29009, 29017, 29021, 29023, 29027, +29033, 29059, 29063, 29077, 29101, 29123, 29129, 29131, 29137, 29147, 29153, +29167, 29173, 29179, 29191, 29201, 29207, 29209, 29221, 29231, 29243, 29251, +29269, 29287, 29297, 29303, 29311, 29327, 29333, 29339, 29347, 29363, 29383, +29387, 29389, 29399, 29401, 29411, 29423, 29429, 29437, 29443, 29453, 29473, +29483, 29501, 29527, 29531, 29537, 29567, 29569, 29573, 29581, 29587, 29599, +29611, 29629, 29633, 29641, 29663, 29669, 29671, 29683, 29717, 29723, 29741, +29753, 29759, 29761, 29789, 29803, 29819, 29833, 29837, 29851, 29863, 29867, +29873, 29879, 29881, 29917, 29921, 29927, 29947, 29959, 29983, 29989, 30011, +30013, 30029, 30047, 30059, 30071, 30089, 30091, 30097, 30103, 30109, 30113, +30119, 30133, 30137, 30139, 30161, 30169, 30181, 30187, 30197, 30203, 30211, +30223, 30241, 30253, 30259, 30269, 30271, 30293, 30307, 30313, 30319, 30323, +30341, 30347, 30367, 30389, 30391, 30403, 30427, 30431, 30449, 30467, 30469, +30491, 30493, 30497, 30509, 30517, 30529, 30539, 30553, 30557, 30559, 30577, +30593, 30631, 30637, 30643, 30649, 30661, 30671, 30677, 30689, 30697, 30703, +30707, 30713, 30727, 30757, 30763, 30773, 30781, 30803, 30809, 30817, 30829, +30839, 30841, 30851, 30853, 30859, 30869, 30871, 30881, 30893, 30911, 30931, +30937, 30941, 30949, 30971, 30977, 30983, 31013, 31019, 31033, 31039, 31051, +31063, 31069, 31079, 31081, 31091, 31121, 31123, 31139, 31147, 31151, 31153, +31159, 31177, 31181, 31183, 31189, 31193, 31219, 31223, 31231, 31237, 31247, +31249, 31253, 31259, 31267, 31271, 31277, 31307, 31319, 31321, 31327, 31333, +31337, 31357, 31379, 31387, 31391, 31393, 31397, 31469, 31477, 31481, 31489, +31511, 31513, 31517, 31531, 31541, 31543, 31547, 31567, 31573, 31583, 31601, +31607, 31627, 31643, 31649, 31657, 31663, 31667, 31687, 31699, 31721, 31723, +31727, 31729, 31741, 31751, 31769, 31771, 31793, 31799, 31817, 31847, 31849, +31859, 31873, 31883, 31891, 31907, 31957, 31963, 31973, 31981, 31991, 32003, +32009, 32027, 32029, 32051, 32057, 32059, 32063, 32069, 32077, 32083, 32089, +32099, 32117, 32119, 32141, 32143, 32159, 32173, 32183, 32189, 32191, 32203, +32213, 32233, 32237, 32251, 32257, 32261, 32297, 32299, 32303, 32309, 32321, +32323, 32327, 32341, 32353, 32359, 32363, 32369, 32371, 32377, 32381, 32401, +32411, 32413, 32423, 32429, 32441, 32443, 32467, 32479, 32491, 32497, 32503, +32507, 32531, 32533, 32537, 32561, 32563, 32569, 32573, 32579, 32587, 32603, +32609, 32611, 32621, 32633, 32647, 32653, 32687, 32693, 32707, 32713, 32717, +32719, 32749, 32771, 32779, 32783, 32789, 32797, 32801, 32803, 32831, 32833, +32839, 32843, 32869, 32887, 32909, 32911, 32917, 32933, 32939, 32941, 32957, +32969, 32971, 32983, 32987, 32993, 32999, 33013, 33023, 33029, 33037, 33049, +33053, 33071, 33073, 33083, 33091, 33107, 33113, 33119, 33149, 33151, 33161, +33179, 33181, 33191, 33199, 33203, 33211, 33223, 33247, 33287, 33289, 33301, +33311, 33317, 33329, 33331, 33343, 33347, 33349, 33353, 33359, 33377, 33391, +33403, 33409, 33413, 33427, 33457, 33461, 33469, 33479, 33487, 33493, 33503, +33521, 33529, 33533, 33547, 33563, 33569, 33577, 33581, 33587, 33589, 33599, +33601, 33613, 33617, 33619, 33623, 33629, 33637, 33641, 33647, 33679, 33703, +33713, 33721, 33739, 33749, 33751, 33757, 33767, 33769, 33773, 33791, 33797, +33809, 33811, 33827, 33829, 33851, 33857, 33863, 33871, 33889, 33893, 33911, +33923, 33931, 33937, 33941, 33961, 33967, 33997, 34019, 34031, 34033, 34039, +34057, 34061, 34123, 34127, 34129, 34141, 34147, 34157, 34159, 34171, 34183, +34211, 34213, 34217, 34231, 34253, 34259, 34261, 34267, 34273, 34283, 34297, +34301, 34303, 34313, 34319, 34327, 34337, 34351, 34361, 34367, 34369, 34381, +34403, 34421, 34429, 34439, 34457, 34469, 34471, 34483, 34487, 34499, 34501, +34511, 34513, 34519, 34537, 34543, 34549, 34583, 34589, 34591, 34603, 34607, +34613, 34631, 34649, 34651, 34667, 34673, 34679, 34687, 34693, 34703, 34721, +34729, 34739, 34747, 34757, 34759, 34763, 34781, 34807, 34819, 34841, 34843, +34847, 34849, 34871, 34877, 34883, 34897, 34913, 34919, 34939, 34949, 34961, +34963, 34981, 35023, 35027, 35051, 35053, 35059, 35069, 35081, 35083, 35089, +35099, 35107, 35111, 35117, 35129, 35141, 35149, 35153, 35159, 35171, 35201, +35221, 35227, 35251, 35257, 35267, 35279, 35281, 35291, 35311, 35317, 35323, +35327, 35339, 35353, 35363, 35381, 35393, 35401, 35407, 35419, 35423, 35437, +35447, 35449, 35461, 35491, 35507, 35509, 35521, 35527, 35531, 35533, 35537, +35543, 35569, 35573, 35591, 35593, 35597, 35603, 35617, 35671, 35677, 35729, +35731, 35747, 35753, 35759, 35771, 35797, 35801, 35803, 35809, 35831, 35837, +35839, 35851, 35863, 35869, 35879, 35897, 35899, 35911, 35923, 35933, 35951, +35963, 35969, 35977, 35983, 35993, 35999, 36007, 36011, 36013, 36017, 36037, +36061, 36067, 36073, 36083, 36097, 36107, 36109, 36131, 36137, 36151, 36161, +36187, 36191, 36209, 36217, 36229, 36241, 36251, 36263, 36269, 36277, 36293, +36299, 36307, 36313, 36319, 36341, 36343, 36353, 36373, 36383, 36389, 36433, +36451, 36457, 36467, 36469, 36473, 36479, 36493, 36497, 36523, 36527, 36529, +36541, 36551, 36559, 36563, 36571, 36583, 36587, 36599, 36607, 36629, 36637, +36643, 36653, 36671, 36677, 36683, 36691, 36697, 36709, 36713, 36721, 36739, +36749, 36761, 36767, 36779, 36781, 36787, 36791, 36793, 36809, 36821, 36833, +36847, 36857, 36871, 36877, 36887, 36899, 36901, 36913, 36919, 36923, 36929, +36931, 36943, 36947, 36973, 36979, 36997, 37003, 37013, 37019, 37021, 37039, +37049, 37057, 37061, 37087, 37097, 37117, 37123, 37139, 37159, 37171, 37181, +37189, 37199, 37201, 37217, 37223, 37243, 37253, 37273, 37277, 37307, 37309, +37313, 37321, 37337, 37339, 37357, 37361, 37363, 37369, 37379, 37397, 37409, +37423, 37441, 37447, 37463, 37483, 37489, 37493, 37501, 37507, 37511, 37517, +37529, 37537, 37547, 37549, 37561, 37567, 37571, 37573, 37579, 37589, 37591, +37607, 37619, 37633, 37643, 37649, 37657, 37663, 37691, 37693, 37699, 37717, +37747, 37781, 37783, 37799, 37811, 37813, 37831, 37847, 37853, 37861, 37871, +37879, 37889, 37897, 37907, 37951, 37957, 37963, 37967, 37987, 37991, 37993, +37997, 38011, 38039, 38047, 38053, 38069, 38083, 38113, 38119, 38149, 38153, +38167, 38177, 38183, 38189, 38197, 38201, 38219, 38231, 38237, 38239, 38261, +38273, 38281, 38287, 38299, 38303, 38317, 38321, 38327, 38329, 38333, 38351, +38371, 38377, 38393, 38431, 38447, 38449, 38453, 38459, 38461, 38501, 38543, +38557, 38561, 38567, 38569, 38593, 38603, 38609, 38611, 38629, 38639, 38651, +38653, 38669, 38671, 38677, 38693, 38699, 38707, 38711, 38713, 38723, 38729, +38737, 38747, 38749, 38767, 38783, 38791, 38803, 38821, 38833, 38839, 38851, +38861, 38867, 38873, 38891, 38903, 38917, 38921, 38923, 38933, 38953, 38959, +38971, 38977, 38993, 39019, 39023, 39041, 39043, 39047, 39079, 39089, 39097, +39103, 39107, 39113, 39119, 39133, 39139, 39157, 39161, 39163, 39181, 39191, +39199, 39209, 39217, 39227, 39229, 39233, 39239, 39241, 39251, 39293, 39301, +39313, 39317, 39323, 39341, 39343, 39359, 39367, 39371, 39373, 39383, 39397, +39409, 39419, 39439, 39443, 39451, 39461, 39499, 39503, 39509, 39511, 39521, +39541, 39551, 39563, 39569, 39581, 39607, 39619, 39623, 39631, 39659, 39667, +39671, 39679, 39703, 39709, 39719, 39727, 39733, 39749, 39761, 39769, 39779, +39791, 39799, 39821, 39827, 39829, 39839, 39841, 39847, 39857, 39863, 39869, +39877, 39883, 39887, 39901, 39929, 39937, 39953, 39971, 39979, 39983, 39989, +40009, 40013, 40031, 40037, 40039, 40063, 40087, 40093, 40099, 40111, 40123, +40127, 40129, 40151, 40153, 40163, 40169, 40177, 40189, 40193, 40213, 40231, +40237, 40241, 40253, 40277, 40283, 40289, 40343, 40351, 40357, 40361, 40387, +40423, 40427, 40429, 40433, 40459, 40471, 40483, 40487, 40493, 40499, 40507, +40519, 40529, 40531, 40543, 40559, 40577, 40583, 40591, 40597, 40609, 40627, +40637, 40639, 40693, 40697, 40699, 40709, 40739, 40751, 40759, 40763, 40771, +40787, 40801, 40813, 40819, 40823, 40829, 40841, 40847, 40849, 40853, 40867, +40879, 40883, 40897, 40903, 40927, 40933, 40939, 40949, 40961, 40973, 40993, +41011, 41017, 41023, 41039, 41047, 41051, 41057, 41077, 41081, 41113, 41117, +41131, 41141, 41143, 41149, 41161, 41177, 41179, 41183, 41189, 41201, 41203, +41213, 41221, 41227, 41231, 41233, 41243, 41257, 41263, 41269, 41281, 41299, +41333, 41341, 41351, 41357, 41381, 41387, 41389, 41399, 41411, 41413, 41443, +41453, 41467, 41479, 41491, 41507, 41513, 41519, 41521, 41539, 41543, 41549, +41579, 41593, 41597, 41603, 41609, 41611, 41617, 41621, 41627, 41641, 41647, +41651, 41659, 41669, 41681, 41687, 41719, 41729, 41737, 41759, 41761, 41771, +41777, 41801, 41809, 41813, 41843, 41849, 41851, 41863, 41879, 41887, 41893, +41897, 41903, 41911, 41927, 41941, 41947, 41953, 41957, 41959, 41969, 41981, +41983, 41999, 42013, 42017, 42019, 42023, 42043, 42061, 42071, 42073, 42083, +42089, 42101, 42131, 42139, 42157, 42169, 42179, 42181, 42187, 42193, 42197, +42209, 42221, 42223, 42227, 42239, 42257, 42281, 42283, 42293, 42299, 42307, +42323, 42331, 42337, 42349, 42359, 42373, 42379, 42391, 42397, 42403, 42407, +42409, 42433, 42437, 42443, 42451, 42457, 42461, 42463, 42467, 42473, 42487, +42491, 42499, 42509, 42533, 42557, 42569, 42571, 42577, 42589, 42611, 42641, +42643, 42649, 42667, 42677, 42683, 42689, 42697, 42701, 42703, 42709, 42719, +42727, 42737, 42743, 42751, 42767, 42773, 42787, 42793, 42797, 42821, 42829, +42839, 42841, 42853, 42859, 42863, 42899, 42901, 42923, 42929, 42937, 42943, +42953, 42961, 42967, 42979, 42989, 43003, 43013, 43019, 43037, 43049, 43051, +43063, 43067, 43093, 43103, 43117, 43133, 43151, 43159, 43177, 43189, 43201, +43207, 43223, 43237, 43261, 43271, 43283, 43291, 43313, 43319, 43321, 43331, +43391, 43397, 43399, 43403, 43411, 43427, 43441, 43451, 43457, 43481, 43487, +43499, 43517, 43541, 43543, 43573, 43577, 43579, 43591, 43597, 43607, 43609, +43613, 43627, 43633, 43649, 43651, 43661, 43669, 43691, 43711, 43717, 43721, +43753, 43759, 43777, 43781, 43783, 43787, 43789, 43793, 43801, 43853, 43867, +43889, 43891, 43913, 43933, 43943, 43951, 43961, 43963, 43969, 43973, 43987, +43991, 43997, 44017, 44021, 44027, 44029, 44041, 44053, 44059, 44071, 44087, +44089, 44101, 44111, 44119, 44123, 44129, 44131, 44159, 44171, 44179, 44189, +44201, 44203, 44207, 44221, 44249, 44257, 44263, 44267, 44269, 44273, 44279, +44281, 44293, 44351, 44357, 44371, 44381, 44383, 44389, 44417, 44449, 44453, +44483, 44491, 44497, 44501, 44507, 44519, 44531, 44533, 44537, 44543, 44549, +44563, 44579, 44587, 44617, 44621, 44623, 44633, 44641, 44647, 44651, 44657, +44683, 44687, 44699, 44701, 44711, 44729, 44741, 44753, 44771, 44773, 44777, +44789, 44797, 44809, 44819, 44839, 44843, 44851, 44867, 44879, 44887, 44893, +44909, 44917, 44927, 44939, 44953, 44959, 44963, 44971, 44983, 44987, 45007, +45013, 45053, 45061, 45077, 45083, 45119, 45121, 45127, 45131, 45137, 45139, +45161, 45179, 45181, 45191, 45197, 45233, 45247, 45259, 45263, 45281, 45289, +45293, 45307, 45317, 45319, 45329, 45337, 45341, 45343, 45361, 45377, 45389, +45403, 45413, 45427, 45433, 45439, 45481, 45491, 45497, 45503, 45523, 45533, +45541, 45553, 45557, 45569, 45587, 45589, 45599, 45613, 45631, 45641, 45659, +45667, 45673, 45677, 45691, 45697, 45707, 45737, 45751, 45757, 45763, 45767, +45779, 45817, 45821, 45823, 45827, 45833, 45841, 45853, 45863, 45869, 45887, +45893, 45943, 45949, 45953, 45959, 45971, 45979, 45989, 46021, 46027, 46049, +46051, 46061, 46073, 46091, 46093, 46099, 46103, 46133, 46141, 46147, 46153, +46171, 46181, 46183, 46187, 46199, 46219, 46229, 46237, 46261, 46271, 46273, +46279, 46301, 46307, 46309, 46327, 46337, 46349, 46351, 46381, 46399, 46411, +46439, 46441, 46447, 46451, 46457, 46471, 46477, 46489, 46499, 46507, 46511, +46523, 46549, 46559, 46567, 46573, 46589, 46591, 46601, 46619, 46633, 46639, +46643, 46649, 46663, 46679, 46681, 46687, 46691, 46703, 46723, 46727, 46747, +46751, 46757, 46769, 46771, 46807, 46811, 46817, 46819, 46829, 46831, 46853, +46861, 46867, 46877, 46889, 46901, 46919, 46933, 46957, 46993, 46997, 47017, +47041, 47051, 47057, 47059, 47087, 47093, 47111, 47119, 47123, 47129, 47137, +47143, 47147, 47149, 47161, 47189, 47207, 47221, 47237, 47251, 47269, 47279, +47287, 47293, 47297, 47303, 47309, 47317, 47339, 47351, 47353, 47363, 47381, +47387, 47389, 47407, 47417, 47419, 47431, 47441, 47459, 47491, 47497, 47501, +47507, 47513, 47521, 47527, 47533, 47543, 47563, 47569, 47581, 47591, 47599, +47609, 47623, 47629, 47639, 47653, 47657, 47659, 47681, 47699, 47701, 47711, +47713, 47717, 47737, 47741, 47743, 47777, 47779, 47791, 47797, 47807, 47809, +47819, 47837, 47843, 47857, 47869, 47881, 47903, 47911, 47917, 47933, 47939, +47947, 47951, 47963, 47969, 47977, 47981, 48017, 48023, 48029, 48049, 48073, +48079, 48091, 48109, 48119, 48121, 48131, 48157, 48163, 48179, 48187, 48193, +48197, 48221, 48239, 48247, 48259, 48271, 48281, 48299, 48311, 48313, 48337, +48341, 48353, 48371, 48383, 48397, 48407, 48409, 48413, 48437, 48449, 48463, +48473, 48479, 48481, 48487, 48491, 48497, 48523, 48527, 48533, 48539, 48541, +48563, 48571, 48589, 48593, 48611, 48619, 48623, 48647, 48649, 48661, 48673, +48677, 48679, 48731, 48733, 48751, 48757, 48761, 48767, 48779, 48781, 48787, +48799, 48809, 48817, 48821, 48823, 48847, 48857, 48859, 48869, 48871, 48883, +48889, 48907, 48947, 48953, 48973, 48989, 48991, 49003, 49009, 49019, 49031, +49033, 49037, 49043, 49057, 49069, 49081, 49103, 49109, 49117, 49121, 49123, +49139, 49157, 49169, 49171, 49177, 49193, 49199, 49201, 49207, 49211, 49223, +49253, 49261, 49277, 49279, 49297, 49307, 49331, 49333, 49339, 49363, 49367, +49369, 49391, 49393, 49409, 49411, 49417, 49429, 49433, 49451, 49459, 49463, +49477, 49481, 49499, 49523, 49529, 49531, 49537, 49547, 49549, 49559, 49597, +49603, 49613, 49627, 49633, 49639, 49663, 49667, 49669, 49681, 49697, 49711, +49727, 49739, 49741, 49747, 49757, 49783, 49787, 49789, 49801, 49807, 49811, +49823, 49831, 49843, 49853, 49871, 49877, 49891, 49919, 49921, 49927, 49937, +49939, 49943, 49957, 49991, 49993, 49999, 50021, 50023, 50033, 50047, 50051, +50053, 50069, 50077, 50087, 50093, 50101, 50111, 50119, 50123, 50129, 50131, +50147, 50153, 50159, 50177, 50207, 50221, 50227, 50231, 50261, 50263, 50273, +50287, 50291, 50311, 50321, 50329, 50333, 50341, 50359, 50363, 50377, 50383, +50387, 50411, 50417, 50423, 50441, 50459, 50461, 50497, 50503, 50513, 50527, +50539, 50543, 50549, 50551, 50581, 50587, 50591, 50593, 50599, 50627, 50647, +50651, 50671, 50683, 50707, 50723, 50741, 50753, 50767, 50773, 50777, 50789, +50821, 50833, 50839, 50849, 50857, 50867, 50873, 50891, 50893, 50909, 50923, +50929, 50951, 50957, 50969, 50971, 50989, 50993, 51001, 51031, 51043, 51047, +51059, 51061, 51071, 51109, 51131, 51133, 51137, 51151, 51157, 51169, 51193, +51197, 51199, 51203, 51217, 51229, 51239, 51241, 51257, 51263, 51283, 51287, +51307, 51329, 51341, 51343, 51347, 51349, 51361, 51383, 51407, 51413, 51419, +51421, 51427, 51431, 51437, 51439, 51449, 51461, 51473, 51479, 51481, 51487, +51503, 51511, 51517, 51521, 51539, 51551, 51563, 51577, 51581, 51593, 51599, +51607, 51613, 51631, 51637, 51647, 51659, 51673, 51679, 51683, 51691, 51713, +51719, 51721, 51749, 51767, 51769, 51787, 51797, 51803, 51817, 51827, 51829, +51839, 51853, 51859, 51869, 51871, 51893, 51899, 51907, 51913, 51929, 51941, +51949, 51971, 51973, 51977, 51991, 52009, 52021, 52027, 52051, 52057, 52067, +52069, 52081, 52103, 52121, 52127, 52147, 52153, 52163, 52177, 52181, 52183, +52189, 52201, 52223, 52237, 52249, 52253, 52259, 52267, 52289, 52291, 52301, +52313, 52321, 52361, 52363, 52369, 52379, 52387, 52391, 52433, 52453, 52457, +52489, 52501, 52511, 52517, 52529, 52541, 52543, 52553, 52561, 52567, 52571, +52579, 52583, 52609, 52627, 52631, 52639, 52667, 52673, 52691, 52697, 52709, +52711, 52721, 52727, 52733, 52747, 52757, 52769, 52783, 52807, 52813, 52817, +52837, 52859, 52861, 52879, 52883, 52889, 52901, 52903, 52919, 52937, 52951, +52957, 52963, 52967, 52973, 52981, 52999, 53003, 53017, 53047, 53051, 53069, +53077, 53087, 53089, 53093, 53101, 53113, 53117, 53129, 53147, 53149, 53161, +53171, 53173, 53189, 53197, 53201, 53231, 53233, 53239, 53267, 53269, 53279, +53281, 53299, 53309, 53323, 53327, 53353, 53359, 53377, 53381, 53401, 53407, +53411, 53419, 53437, 53441, 53453, 53479, 53503, 53507, 53527, 53549, 53551, +53569, 53591, 53593, 53597, 53609, 53611, 53617, 53623, 53629, 53633, 53639, +53653, 53657, 53681, 53693, 53699, 53717, 53719, 53731, 53759, 53773, 53777, +53783, 53791, 53813, 53819, 53831, 53849, 53857, 53861, 53881, 53887, 53891, +53897, 53899, 53917, 53923, 53927, 53939, 53951, 53959, 53987, 53993, 54001, +54011, 54013, 54037, 54049, 54059, 54083, 54091, 54101, 54121, 54133, 54139, +54151, 54163, 54167, 54181, 54193, 54217, 54251, 54269, 54277, 54287, 54293, +54311, 54319, 54323, 54331, 54347, 54361, 54367, 54371, 54377, 54401, 54403, +54409, 54413, 54419, 54421, 54437, 54443, 54449, 54469, 54493, 54497, 54499, +54503, 54517, 54521, 54539, 54541, 54547, 54559, 54563, 54577, 54581, 54583, +54601, 54617, 54623, 54629, 54631, 54647, 54667, 54673, 54679, 54709, 54713, +54721, 54727, 54751, 54767, 54773, 54779, 54787, 54799, 54829, 54833, 54851, +54869, 54877, 54881, 54907, 54917, 54919, 54941, 54949, 54959, 54973, 54979, +54983, 55001, 55009, 55021, 55049, 55051, 55057, 55061, 55073, 55079, 55103, +55109, 55117, 55127, 55147, 55163, 55171, 55201, 55207, 55213, 55217, 55219, +55229, 55243, 55249, 55259, 55291, 55313, 55331, 55333, 55337, 55339, 55343, +55351, 55373, 55381, 55399, 55411, 55439, 55441, 55457, 55469, 55487, 55501, +55511, 55529, 55541, 55547, 55579, 55589, 55603, 55609, 55619, 55621, 55631, +55633, 55639, 55661, 55663, 55667, 55673, 55681, 55691, 55697, 55711, 55717, +55721, 55733, 55763, 55787, 55793, 55799, 55807, 55813, 55817, 55819, 55823, +55829, 55837, 55843, 55849, 55871, 55889, 55897, 55901, 55903, 55921, 55927, +55931, 55933, 55949, 55967, 55987, 55997, 56003, 56009, 56039, 56041, 56053, +56081, 56087, 56093, 56099, 56101, 56113, 56123, 56131, 56149, 56167, 56171, +56179, 56197, 56207, 56209, 56237, 56239, 56249, 56263, 56267, 56269, 56299, +56311, 56333, 56359, 56369, 56377, 56383, 56393, 56401, 56417, 56431, 56437, +56443, 56453, 56467, 56473, 56477, 56479, 56489, 56501, 56503, 56509, 56519, +56527, 56531, 56533, 56543, 56569, 56591, 56597, 56599, 56611, 56629, 56633, +56659, 56663, 56671, 56681, 56687, 56701, 56711, 56713, 56731, 56737, 56747, +56767, 56773, 56779, 56783, 56807, 56809, 56813, 56821, 56827, 56843, 56857, +56873, 56891, 56893, 56897, 56909, 56911, 56921, 56923, 56929, 56941, 56951, +56957, 56963, 56983, 56989, 56993, 56999, 57037, 57041, 57047, 57059, 57073, +57077, 57089, 57097, 57107, 57119, 57131, 57139, 57143, 57149, 57163, 57173, +57179, 57191, 57193, 57203, 57221, 57223, 57241, 57251, 57259, 57269, 57271, +57283, 57287, 57301, 57329, 57331, 57347, 57349, 57367, 57373, 57383, 57389, +57397, 57413, 57427, 57457, 57467, 57487, 57493, 57503, 57527, 57529, 57557, +57559, 57571, 57587, 57593, 57601, 57637, 57641, 57649, 57653, 57667, 57679, +57689, 57697, 57709, 57713, 57719, 57727, 57731, 57737, 57751, 57773, 57781, +57787, 57791, 57793, 57803, 57809, 57829, 57839, 57847, 57853, 57859, 57881, +57899, 57901, 57917, 57923, 57943, 57947, 57973, 57977, 57991, 58013, 58027, +58031, 58043, 58049, 58057, 58061, 58067, 58073, 58099, 58109, 58111, 58129, +58147, 58151, 58153, 58169, 58171, 58189, 58193, 58199, 58207, 58211, 58217, +58229, 58231, 58237, 58243, 58271, 58309, 58313, 58321, 58337, 58363, 58367, +58369, 58379, 58391, 58393, 58403, 58411, 58417, 58427, 58439, 58441, 58451, +58453, 58477, 58481, 58511, 58537, 58543, 58549, 58567, 58573, 58579, 58601, +58603, 58613, 58631, 58657, 58661, 58679, 58687, 58693, 58699, 58711, 58727, +58733, 58741, 58757, 58763, 58771, 58787, 58789, 58831, 58889, 58897, 58901, +58907, 58909, 58913, 58921, 58937, 58943, 58963, 58967, 58979, 58991, 58997, +59009, 59011, 59021, 59023, 59029, 59051, 59053, 59063, 59069, 59077, 59083, +59093, 59107, 59113, 59119, 59123, 59141, 59149, 59159, 59167, 59183, 59197, +59207, 59209, 59219, 59221, 59233, 59239, 59243, 59263, 59273, 59281, 59333, +59341, 59351, 59357, 59359, 59369, 59377, 59387, 59393, 59399, 59407, 59417, +59419, 59441, 59443, 59447, 59453, 59467, 59471, 59473, 59497, 59509, 59513, +59539, 59557, 59561, 59567, 59581, 59611, 59617, 59621, 59627, 59629, 59651, +59659, 59663, 59669, 59671, 59693, 59699, 59707, 59723, 59729, 59743, 59747, +59753, 59771, 59779, 59791, 59797, 59809, 59833, 59863, 59879, 59887, 59921, +59929, 59951, 59957, 59971, 59981, 59999, 60013, 60017, 60029, 60037, 60041, +60077, 60083, 60089, 60091, 60101, 60103, 60107, 60127, 60133, 60139, 60149, +60161, 60167, 60169, 60209, 60217, 60223, 60251, 60257, 60259, 60271, 60289, +60293, 60317, 60331, 60337, 60343, 60353, 60373, 60383, 60397, 60413, 60427, +60443, 60449, 60457, 60493, 60497, 60509, 60521, 60527, 60539, 60589, 60601, +60607, 60611, 60617, 60623, 60631, 60637, 60647, 60649, 60659, 60661, 60679, +60689, 60703, 60719, 60727, 60733, 60737, 60757, 60761, 60763, 60773, 60779, +60793, 60811, 60821, 60859, 60869, 60887, 60889, 60899, 60901, 60913, 60917, +60919, 60923, 60937, 60943, 60953, 60961, 61001, 61007, 61027, 61031, 61043, +61051, 61057, 61091, 61099, 61121, 61129, 61141, 61151, 61153, 61169, 61211, +61223, 61231, 61253, 61261, 61283, 61291, 61297, 61331, 61333, 61339, 61343, +61357, 61363, 61379, 61381, 61403, 61409, 61417, 61441, 61463, 61469, 61471, +61483, 61487, 61493, 61507, 61511, 61519, 61543, 61547, 61553, 61559, 61561, +61583, 61603, 61609, 61613, 61627, 61631, 61637, 61643, 61651, 61657, 61667, +61673, 61681, 61687, 61703, 61717, 61723, 61729, 61751, 61757, 61781, 61813, +61819, 61837, 61843, 61861, 61871, 61879, 61909, 61927, 61933, 61949, 61961, +61967, 61979, 61981, 61987, 61991, 62003, 62011, 62017, 62039, 62047, 62053, +62057, 62071, 62081, 62099, 62119, 62129, 62131, 62137, 62141, 62143, 62171, +62189, 62191, 62201, 62207, 62213, 62219, 62233, 62273, 62297, 62299, 62303, +62311, 62323, 62327, 62347, 62351, 62383, 62401, 62417, 62423, 62459, 62467, +62473, 62477, 62483, 62497, 62501, 62507, 62533, 62539, 62549, 62563, 62581, +62591, 62597, 62603, 62617, 62627, 62633, 62639, 62653, 62659, 62683, 62687, +62701, 62723, 62731, 62743, 62753, 62761, 62773, 62791, 62801, 62819, 62827, +62851, 62861, 62869, 62873, 62897, 62903, 62921, 62927, 62929, 62939, 62969, +62971, 62981, 62983, 62987, 62989, 63029, 63031, 63059, 63067, 63073, 63079, +63097, 63103, 63113, 63127, 63131, 63149, 63179, 63197, 63199, 63211, 63241, +63247, 63277, 63281, 63299, 63311, 63313, 63317, 63331, 63337, 63347, 63353, +63361, 63367, 63377, 63389, 63391, 63397, 63409, 63419, 63421, 63439, 63443, +63463, 63467, 63473, 63487, 63493, 63499, 63521, 63527, 63533, 63541, 63559, +63577, 63587, 63589, 63599, 63601, 63607, 63611, 63617, 63629, 63647, 63649, +63659, 63667, 63671, 63689, 63691, 63697, 63703, 63709, 63719, 63727, 63737, +63743, 63761, 63773, 63781, 63793, 63799, 63803, 63809, 63823, 63839, 63841, +63853, 63857, 63863, 63901, 63907, 63913, 63929, 63949, 63977, 63997, 64007, +64013, 64019, 64033, 64037, 64063, 64067, 64081, 64091, 64109, 64123, 64151, +64153, 64157, 64171, 64187, 64189, 64217, 64223, 64231, 64237, 64271, 64279, +64283, 64301, 64303, 64319, 64327, 64333, 64373, 64381, 64399, 64403, 64433, +64439, 64451, 64453, 64483, 64489, 64499, 64513, 64553, 64567, 64577, 64579, +64591, 64601, 64609, 64613, 64621, 64627, 64633, 64661, 64663, 64667, 64679, +64693, 64709, 64717, 64747, 64763, 64781, 64783, 64793, 64811, 64817, 64849, +64853, 64871, 64877, 64879, 64891, 64901, 64919, 64921, 64927, 64937, 64951, +64969, 64997, 65003, 65011, 65027, 65029, 65033, 65053, 65063, 65071, 65089, +65099, 65101, 65111, 65119, 65123, 65129, 65141, 65147, 65167, 65171, 65173, +65179, 65183, 65203, 65213, 65239, 65257, 65267, 65269, 65287, 65293, 65309, +65323, 65327, 65353, 65357, 65371, 65381, 65393, 65407, 65413, 65419, 65423, +65437, 65447, 65449, 65479, 65497, 65519, 65521, 0 }; + +} diff --git a/comm/third_party/botan/src/lib/math/numbertheory/reducer.cpp b/comm/third_party/botan/src/lib/math/numbertheory/reducer.cpp new file mode 100644 index 0000000000..deb3874d3e --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/reducer.cpp @@ -0,0 +1,119 @@ +/* +* Modular Reducer +* (C) 1999-2011,2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/reducer.h> +#include <botan/internal/ct_utils.h> +#include <botan/internal/mp_core.h> +#include <botan/divide.h> + +namespace Botan { + +/* +* Modular_Reducer Constructor +*/ +Modular_Reducer::Modular_Reducer(const BigInt& mod) + { + if(mod < 0) + throw Invalid_Argument("Modular_Reducer: modulus must be positive"); + + // Left uninitialized if mod == 0 + m_mod_words = 0; + + if(mod > 0) + { + m_modulus = mod; + m_mod_words = m_modulus.sig_words(); + + // Compute mu = floor(2^{2k} / m) + m_mu.set_bit(2 * BOTAN_MP_WORD_BITS * m_mod_words); + m_mu = ct_divide(m_mu, m_modulus); + } + } + +BigInt Modular_Reducer::reduce(const BigInt& x) const + { + BigInt r; + secure_vector<word> ws; + reduce(r, x, ws); + return r; + } + +namespace { + +/* +* Like if(cnd) x.rev_sub(...) but in const time +*/ +void cnd_rev_sub(bool cnd, BigInt& x, const word y[], size_t y_sw, secure_vector<word>& ws) + { + if(x.sign() != BigInt::Positive) + throw Invalid_State("BigInt::sub_rev requires this is positive"); + + const size_t x_sw = x.sig_words(); + + const size_t max_words = std::max(x_sw, y_sw); + ws.resize(std::max(x_sw, y_sw)); + clear_mem(ws.data(), ws.size()); + x.grow_to(max_words); + + const int32_t relative_size = bigint_sub_abs(ws.data(), x.data(), x_sw, y, y_sw); + + x.cond_flip_sign((relative_size > 0) && cnd); + bigint_cnd_swap(cnd, x.mutable_data(), ws.data(), max_words); + } + +} + +void Modular_Reducer::reduce(BigInt& t1, const BigInt& x, secure_vector<word>& ws) const + { + if(&t1 == &x) + throw Invalid_State("Modular_Reducer arguments cannot alias"); + if(m_mod_words == 0) + throw Invalid_State("Modular_Reducer: Never initalized"); + + const size_t x_sw = x.sig_words(); + + if(x_sw > 2*m_mod_words) + { + // too big, fall back to slow boat division + t1 = ct_modulo(x, m_modulus); + return; + } + + t1 = x; + t1.set_sign(BigInt::Positive); + t1 >>= (BOTAN_MP_WORD_BITS * (m_mod_words - 1)); + + t1.mul(m_mu, ws); + t1 >>= (BOTAN_MP_WORD_BITS * (m_mod_words + 1)); + + // TODO add masked mul to avoid computing high bits + t1.mul(m_modulus, ws); + t1.mask_bits(BOTAN_MP_WORD_BITS * (m_mod_words + 1)); + + t1.rev_sub(x.data(), std::min(x_sw, m_mod_words + 1), ws); + + /* + * If t1 < 0 then we must add b^(k+1) where b = 2^w. To avoid a + * side channel perform the addition unconditionally, with ws set + * to either b^(k+1) or else 0. + */ + const word t1_neg = t1.is_negative(); + + if(ws.size() < m_mod_words + 2) + ws.resize(m_mod_words + 2); + clear_mem(ws.data(), ws.size()); + ws[m_mod_words + 1] = t1_neg; + + t1.add(ws.data(), m_mod_words + 2, BigInt::Positive); + + // Per HAC this step requires at most 2 subtractions + t1.ct_reduce_below(m_modulus, ws, 2); + + cnd_rev_sub(t1.is_nonzero() && x.is_negative(), t1, m_modulus.data(), m_modulus.size(), ws); + } + +} diff --git a/comm/third_party/botan/src/lib/math/numbertheory/reducer.h b/comm/third_party/botan/src/lib/math/numbertheory/reducer.h new file mode 100644 index 0000000000..b1c2c87a9f --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/reducer.h @@ -0,0 +1,69 @@ +/* +* Modular Reducer +* (C) 1999-2010 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_MODULAR_REDUCER_H_ +#define BOTAN_MODULAR_REDUCER_H_ + +#include <botan/numthry.h> + +namespace Botan { + +/** +* Modular Reducer (using Barrett's technique) +*/ +class BOTAN_PUBLIC_API(2,0) Modular_Reducer + { + public: + const BigInt& get_modulus() const { return m_modulus; } + + BigInt reduce(const BigInt& x) const; + + /** + * Multiply mod p + * @param x the first operand + * @param y the second operand + * @return (x * y) % p + */ + BigInt multiply(const BigInt& x, const BigInt& y) const + { return reduce(x * y); } + + /** + * Square mod p + * @param x the value to square + * @return (x * x) % p + */ + BigInt square(const BigInt& x) const + { return reduce(Botan::square(x)); } + + /** + * Cube mod p + * @param x the value to cube + * @return (x * x * x) % p + */ + BigInt cube(const BigInt& x) const + { return multiply(x, this->square(x)); } + + /** + * Low level reduction function. Mostly for internal use. + * Sometimes useful for performance by reducing temporaries + * Reduce x mod p and place the output in out. ** X and out must not reference each other ** + * ws is a temporary workspace. + */ + void reduce(BigInt& out, const BigInt& x, secure_vector<word>& ws) const; + + bool initialized() const { return (m_mod_words != 0); } + + Modular_Reducer() { m_mod_words = 0; } + explicit Modular_Reducer(const BigInt& mod); + private: + BigInt m_modulus, m_mu; + size_t m_mod_words; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/math/numbertheory/ressol.cpp b/comm/third_party/botan/src/lib/math/numbertheory/ressol.cpp new file mode 100644 index 0000000000..f9e7e3eb1d --- /dev/null +++ b/comm/third_party/botan/src/lib/math/numbertheory/ressol.cpp @@ -0,0 +1,100 @@ +/* +* (C) 2007,2008 Falko Strenzke, FlexSecure GmbH +* (C) 2008 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/numthry.h> +#include <botan/reducer.h> + +namespace Botan { + +/* +* Tonelli-Shanks algorithm +*/ +BigInt ressol(const BigInt& a, const BigInt& p) + { + if(p <= 1 || p.is_even()) + throw Invalid_Argument("ressol: invalid prime"); + + if(a == 0) + return 0; + else if(a < 0) + throw Invalid_Argument("ressol: value to solve for must be positive"); + else if(a >= p) + throw Invalid_Argument("ressol: value to solve for must be less than p"); + + if(p == 2) + return a; + + if(jacobi(a, p) != 1) // not a quadratic residue + return -BigInt(1); + + if(p % 4 == 3) // The easy case + { + return power_mod(a, ((p+1) >> 2), p); + } + + size_t s = low_zero_bits(p - 1); + BigInt q = p >> s; + + q -= 1; + q >>= 1; + + Modular_Reducer mod_p(p); + + BigInt r = power_mod(a, q, p); + BigInt n = mod_p.multiply(a, mod_p.square(r)); + r = mod_p.multiply(r, a); + + if(n == 1) + return r; + + // find random quadratic nonresidue z + word z = 2; + for(;;) + { + if(jacobi(z, p) == -1) // found one + break; + + z += 1; // try next z + + /* + * The expected number of tests to find a non-residue modulo a + * prime is 2. If we have not found one after 256 then almost + * certainly we have been given a non-prime p. + */ + if(z >= 256) + return -BigInt(1); + } + + BigInt c = power_mod(z, (q << 1) + 1, p); + + while(n > 1) + { + q = n; + + size_t i = 0; + while(q != 1) + { + q = mod_p.square(q); + ++i; + + if(i >= s) + { + return -BigInt(1); + } + } + + c = power_mod(c, BigInt::power_of_2(s-i-1), p); + r = mod_p.multiply(r, c); + c = mod_p.square(c); + n = mod_p.multiply(n, c); + s = i; + } + + return r; + } + +} |