diff options
Diffstat (limited to 'comm/third_party/botan/src/lib/block/idea')
5 files changed, 503 insertions, 0 deletions
diff --git a/comm/third_party/botan/src/lib/block/idea/idea.cpp b/comm/third_party/botan/src/lib/block/idea/idea.cpp new file mode 100644 index 0000000000..f8f5ceb348 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/idea/idea.cpp @@ -0,0 +1,240 @@ +/* +* IDEA +* (C) 1999-2010,2015 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/idea.h> +#include <botan/loadstor.h> +#include <botan/cpuid.h> +#include <botan/internal/ct_utils.h> + +namespace Botan { + +namespace { + +/* +* Multiplication modulo 65537 +*/ +inline uint16_t mul(uint16_t x, uint16_t y) + { + const uint32_t P = static_cast<uint32_t>(x) * y; + const auto P_mask = CT::Mask<uint16_t>(CT::Mask<uint32_t>::is_zero(P)); + + const uint32_t P_hi = P >> 16; + const uint32_t P_lo = P & 0xFFFF; + + const uint16_t carry = (P_lo < P_hi); + const uint16_t r_1 = static_cast<uint16_t>((P_lo - P_hi) + carry); + const uint16_t r_2 = 1 - x - y; + + return P_mask.select(r_2, r_1); + } + +/* +* Find multiplicative inverses modulo 65537 +* +* 65537 is prime; thus Fermat's little theorem tells us that +* x^65537 == x modulo 65537, which means +* x^(65537-2) == x^-1 modulo 65537 since +* x^(65537-2) * x == 1 mod 65537 +* +* Do the exponentiation with a basic square and multiply: all bits are +* of exponent are 1 so we always multiply +*/ +uint16_t mul_inv(uint16_t x) + { + uint16_t y = x; + + for(size_t i = 0; i != 15; ++i) + { + y = mul(y, y); // square + y = mul(y, x); + } + + return y; + } + +/** +* IDEA is involutional, depending only on the key schedule +*/ +void idea_op(const uint8_t in[], uint8_t out[], size_t blocks, const uint16_t K[52]) + { + const size_t BLOCK_SIZE = 8; + + CT::poison(in, blocks * 8); + CT::poison(out, blocks * 8); + CT::poison(K, 52); + + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) + { + uint16_t X1, X2, X3, X4; + load_be(in + BLOCK_SIZE*i, X1, X2, X3, X4); + + for(size_t j = 0; j != 8; ++j) + { + X1 = mul(X1, K[6*j+0]); + X2 += K[6*j+1]; + X3 += K[6*j+2]; + X4 = mul(X4, K[6*j+3]); + + const uint16_t T0 = X3; + X3 = mul(X3 ^ X1, K[6*j+4]); + + const uint16_t T1 = X2; + X2 = mul((X2 ^ X4) + X3, K[6*j+5]); + X3 += X2; + + X1 ^= X2; + X4 ^= X3; + X2 ^= T0; + X3 ^= T1; + } + + X1 = mul(X1, K[48]); + X2 += K[50]; + X3 += K[49]; + X4 = mul(X4, K[51]); + + store_be(out + BLOCK_SIZE*i, X1, X3, X2, X4); + } + + CT::unpoison(in, blocks * 8); + CT::unpoison(out, blocks * 8); + CT::unpoison(K, 52); + } + +} + +size_t IDEA::parallelism() const + { +#if defined(BOTAN_HAS_IDEA_SSE2) + if(CPUID::has_sse2()) + { + return 8; + } +#endif + + return 1; + } + +std::string IDEA::provider() const + { +#if defined(BOTAN_HAS_IDEA_SSE2) + if(CPUID::has_sse2()) + { + return "sse2"; + } +#endif + + return "base"; + } + +/* +* IDEA Encryption +*/ +void IDEA::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_EK.empty() == false); + +#if defined(BOTAN_HAS_IDEA_SSE2) + if(CPUID::has_sse2()) + { + while(blocks >= 8) + { + sse2_idea_op_8(in, out, m_EK.data()); + in += 8 * BLOCK_SIZE; + out += 8 * BLOCK_SIZE; + blocks -= 8; + } + } +#endif + + idea_op(in, out, blocks, m_EK.data()); + } + +/* +* IDEA Decryption +*/ +void IDEA::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_DK.empty() == false); + +#if defined(BOTAN_HAS_IDEA_SSE2) + if(CPUID::has_sse2()) + { + while(blocks >= 8) + { + sse2_idea_op_8(in, out, m_DK.data()); + in += 8 * BLOCK_SIZE; + out += 8 * BLOCK_SIZE; + blocks -= 8; + } + } +#endif + + idea_op(in, out, blocks, m_DK.data()); + } + +/* +* IDEA Key Schedule +*/ +void IDEA::key_schedule(const uint8_t key[], size_t) + { + m_EK.resize(52); + m_DK.resize(52); + + CT::poison(key, 16); + CT::poison(m_EK.data(), 52); + CT::poison(m_DK.data(), 52); + + secure_vector<uint64_t> K(2); + + K[0] = load_be<uint64_t>(key, 0); + K[1] = load_be<uint64_t>(key, 1); + + for(size_t off = 0; off != 48; off += 8) + { + for(size_t i = 0; i != 8; ++i) + m_EK[off+i] = static_cast<uint16_t>(K[i/4] >> (48-16*(i % 4))); + + const uint64_t Kx = (K[0] >> 39); + const uint64_t Ky = (K[1] >> 39); + + K[0] = (K[0] << 25) | Ky; + K[1] = (K[1] << 25) | Kx; + } + + for(size_t i = 0; i != 4; ++i) + m_EK[48+i] = static_cast<uint16_t>(K[i/4] >> (48-16*(i % 4))); + + m_DK[0] = mul_inv(m_EK[48]); + m_DK[1] = -m_EK[49]; + m_DK[2] = -m_EK[50]; + m_DK[3] = mul_inv(m_EK[51]); + + for(size_t i = 0; i != 8*6; i += 6) + { + m_DK[i+4] = m_EK[46-i]; + m_DK[i+5] = m_EK[47-i]; + m_DK[i+6] = mul_inv(m_EK[42-i]); + m_DK[i+7] = -m_EK[44-i]; + m_DK[i+8] = -m_EK[43-i]; + m_DK[i+9] = mul_inv(m_EK[45-i]); + } + + std::swap(m_DK[49], m_DK[50]); + + CT::unpoison(key, 16); + CT::unpoison(m_EK.data(), 52); + CT::unpoison(m_DK.data(), 52); + } + +void IDEA::clear() + { + zap(m_EK); + zap(m_DK); + } + +} diff --git a/comm/third_party/botan/src/lib/block/idea/idea.h b/comm/third_party/botan/src/lib/block/idea/idea.h new file mode 100644 index 0000000000..e5e51606b9 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/idea/idea.h @@ -0,0 +1,45 @@ +/* +* IDEA +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_IDEA_H_ +#define BOTAN_IDEA_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(idea.h) + +namespace Botan { + +/** +* IDEA +*/ +class BOTAN_PUBLIC_API(2,0) IDEA final : public Block_Cipher_Fixed_Params<8, 16> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + + std::string provider() const override; + std::string name() const override { return "IDEA"; } + BlockCipher* clone() const override { return new IDEA; } + size_t parallelism() const override; + + private: +#if defined(BOTAN_HAS_IDEA_SSE2) + void sse2_idea_op_8(const uint8_t in[64], uint8_t out[64], const uint16_t EK[52]) const; +#endif + + void key_schedule(const uint8_t[], size_t) override; + + secure_vector<uint16_t> m_EK, m_DK; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/idea/idea_sse2/idea_sse2.cpp b/comm/third_party/botan/src/lib/block/idea/idea_sse2/idea_sse2.cpp new file mode 100644 index 0000000000..93648cfc7a --- /dev/null +++ b/comm/third_party/botan/src/lib/block/idea/idea_sse2/idea_sse2.cpp @@ -0,0 +1,208 @@ +/* +* IDEA in SSE2 +* (C) 2009 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/idea.h> +#include <botan/internal/ct_utils.h> +#include <emmintrin.h> + +namespace Botan { + +namespace { + +BOTAN_FUNC_ISA("sse2") +inline __m128i mul(__m128i X, uint16_t K_16) + { + const __m128i zeros = _mm_set1_epi16(0); + const __m128i ones = _mm_set1_epi16(1); + + const __m128i K = _mm_set1_epi16(K_16); + + const __m128i X_is_zero = _mm_cmpeq_epi16(X, zeros); + const __m128i K_is_zero = _mm_cmpeq_epi16(K, zeros); + + const __m128i mul_lo = _mm_mullo_epi16(X, K); + const __m128i mul_hi = _mm_mulhi_epu16(X, K); + + __m128i T = _mm_sub_epi16(mul_lo, mul_hi); + + // Unsigned compare; cmp = 1 if mul_lo < mul_hi else 0 + const __m128i subs = _mm_subs_epu16(mul_hi, mul_lo); + const __m128i cmp = _mm_min_epu8( + _mm_or_si128(subs, _mm_srli_epi16(subs, 8)), ones); + + T = _mm_add_epi16(T, cmp); + + /* Selection: if X[i] is zero then assign 1-K + if K is zero then assign 1-X[i] + + Could if() off value of K_16 for the second, but this gives a + constant time implementation which is a nice bonus. + */ + + T = _mm_or_si128( + _mm_andnot_si128(X_is_zero, T), + _mm_and_si128(_mm_sub_epi16(ones, K), X_is_zero)); + + T = _mm_or_si128( + _mm_andnot_si128(K_is_zero, T), + _mm_and_si128(_mm_sub_epi16(ones, X), K_is_zero)); + + return T; + } + +/* +* 4x8 matrix transpose +* +* FIXME: why do I need the extra set of unpack_epi32 here? Inverse in +* transpose_out doesn't need it. Something with the shuffle? Removing +* that extra unpack could easily save 3-4 cycles per block, and would +* also help a lot with register pressure on 32-bit x86 +*/ +BOTAN_FUNC_ISA("sse2") +void transpose_in(__m128i& B0, __m128i& B1, __m128i& B2, __m128i& B3) + { + __m128i T0 = _mm_unpackhi_epi32(B0, B1); + __m128i T1 = _mm_unpacklo_epi32(B0, B1); + __m128i T2 = _mm_unpackhi_epi32(B2, B3); + __m128i T3 = _mm_unpacklo_epi32(B2, B3); + + __m128i T4 = _mm_unpacklo_epi32(T0, T1); + __m128i T5 = _mm_unpackhi_epi32(T0, T1); + __m128i T6 = _mm_unpacklo_epi32(T2, T3); + __m128i T7 = _mm_unpackhi_epi32(T2, T3); + + T0 = _mm_shufflehi_epi16(T4, _MM_SHUFFLE(1, 3, 0, 2)); + T1 = _mm_shufflehi_epi16(T5, _MM_SHUFFLE(1, 3, 0, 2)); + T2 = _mm_shufflehi_epi16(T6, _MM_SHUFFLE(1, 3, 0, 2)); + T3 = _mm_shufflehi_epi16(T7, _MM_SHUFFLE(1, 3, 0, 2)); + + T0 = _mm_shufflelo_epi16(T0, _MM_SHUFFLE(1, 3, 0, 2)); + T1 = _mm_shufflelo_epi16(T1, _MM_SHUFFLE(1, 3, 0, 2)); + T2 = _mm_shufflelo_epi16(T2, _MM_SHUFFLE(1, 3, 0, 2)); + T3 = _mm_shufflelo_epi16(T3, _MM_SHUFFLE(1, 3, 0, 2)); + + T0 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3, 1, 2, 0)); + T1 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(3, 1, 2, 0)); + T2 = _mm_shuffle_epi32(T2, _MM_SHUFFLE(3, 1, 2, 0)); + T3 = _mm_shuffle_epi32(T3, _MM_SHUFFLE(3, 1, 2, 0)); + + B0 = _mm_unpacklo_epi64(T0, T2); + B1 = _mm_unpackhi_epi64(T0, T2); + B2 = _mm_unpacklo_epi64(T1, T3); + B3 = _mm_unpackhi_epi64(T1, T3); + } + +/* +* 4x8 matrix transpose (reverse) +*/ +BOTAN_FUNC_ISA("sse2") +void transpose_out(__m128i& B0, __m128i& B1, __m128i& B2, __m128i& B3) + { + __m128i T0 = _mm_unpacklo_epi64(B0, B1); + __m128i T1 = _mm_unpacklo_epi64(B2, B3); + __m128i T2 = _mm_unpackhi_epi64(B0, B1); + __m128i T3 = _mm_unpackhi_epi64(B2, B3); + + T0 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3, 1, 2, 0)); + T1 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(3, 1, 2, 0)); + T2 = _mm_shuffle_epi32(T2, _MM_SHUFFLE(3, 1, 2, 0)); + T3 = _mm_shuffle_epi32(T3, _MM_SHUFFLE(3, 1, 2, 0)); + + T0 = _mm_shufflehi_epi16(T0, _MM_SHUFFLE(3, 1, 2, 0)); + T1 = _mm_shufflehi_epi16(T1, _MM_SHUFFLE(3, 1, 2, 0)); + T2 = _mm_shufflehi_epi16(T2, _MM_SHUFFLE(3, 1, 2, 0)); + T3 = _mm_shufflehi_epi16(T3, _MM_SHUFFLE(3, 1, 2, 0)); + + T0 = _mm_shufflelo_epi16(T0, _MM_SHUFFLE(3, 1, 2, 0)); + T1 = _mm_shufflelo_epi16(T1, _MM_SHUFFLE(3, 1, 2, 0)); + T2 = _mm_shufflelo_epi16(T2, _MM_SHUFFLE(3, 1, 2, 0)); + T3 = _mm_shufflelo_epi16(T3, _MM_SHUFFLE(3, 1, 2, 0)); + + B0 = _mm_unpacklo_epi32(T0, T1); + B1 = _mm_unpackhi_epi32(T0, T1); + B2 = _mm_unpacklo_epi32(T2, T3); + B3 = _mm_unpackhi_epi32(T2, T3); + } + +} + +/* +* 8 wide IDEA encryption/decryption in SSE2 +*/ +BOTAN_FUNC_ISA("sse2") +void IDEA::sse2_idea_op_8(const uint8_t in[64], uint8_t out[64], const uint16_t EK[52]) const + { + CT::poison(in, 64); + CT::poison(out, 64); + CT::poison(EK, 52); + + const __m128i* in_mm = reinterpret_cast<const __m128i*>(in); + + __m128i B0 = _mm_loadu_si128(in_mm + 0); + __m128i B1 = _mm_loadu_si128(in_mm + 1); + __m128i B2 = _mm_loadu_si128(in_mm + 2); + __m128i B3 = _mm_loadu_si128(in_mm + 3); + + transpose_in(B0, B1, B2, B3); + + // byte swap + B0 = _mm_or_si128(_mm_slli_epi16(B0, 8), _mm_srli_epi16(B0, 8)); + B1 = _mm_or_si128(_mm_slli_epi16(B1, 8), _mm_srli_epi16(B1, 8)); + B2 = _mm_or_si128(_mm_slli_epi16(B2, 8), _mm_srli_epi16(B2, 8)); + B3 = _mm_or_si128(_mm_slli_epi16(B3, 8), _mm_srli_epi16(B3, 8)); + + for(size_t i = 0; i != 8; ++i) + { + B0 = mul(B0, EK[6*i+0]); + B1 = _mm_add_epi16(B1, _mm_set1_epi16(EK[6*i+1])); + B2 = _mm_add_epi16(B2, _mm_set1_epi16(EK[6*i+2])); + B3 = mul(B3, EK[6*i+3]); + + __m128i T0 = B2; + B2 = _mm_xor_si128(B2, B0); + B2 = mul(B2, EK[6*i+4]); + + __m128i T1 = B1; + + B1 = _mm_xor_si128(B1, B3); + B1 = _mm_add_epi16(B1, B2); + B1 = mul(B1, EK[6*i+5]); + + B2 = _mm_add_epi16(B2, B1); + + B0 = _mm_xor_si128(B0, B1); + B1 = _mm_xor_si128(B1, T0); + B3 = _mm_xor_si128(B3, B2); + B2 = _mm_xor_si128(B2, T1); + } + + B0 = mul(B0, EK[48]); + B1 = _mm_add_epi16(B1, _mm_set1_epi16(EK[50])); + B2 = _mm_add_epi16(B2, _mm_set1_epi16(EK[49])); + B3 = mul(B3, EK[51]); + + // byte swap + B0 = _mm_or_si128(_mm_slli_epi16(B0, 8), _mm_srli_epi16(B0, 8)); + B1 = _mm_or_si128(_mm_slli_epi16(B1, 8), _mm_srli_epi16(B1, 8)); + B2 = _mm_or_si128(_mm_slli_epi16(B2, 8), _mm_srli_epi16(B2, 8)); + B3 = _mm_or_si128(_mm_slli_epi16(B3, 8), _mm_srli_epi16(B3, 8)); + + transpose_out(B0, B2, B1, B3); + + __m128i* out_mm = reinterpret_cast<__m128i*>(out); + + _mm_storeu_si128(out_mm + 0, B0); + _mm_storeu_si128(out_mm + 1, B2); + _mm_storeu_si128(out_mm + 2, B1); + _mm_storeu_si128(out_mm + 3, B3); + + CT::unpoison(in, 64); + CT::unpoison(out, 64); + CT::unpoison(EK, 52); + } + +} diff --git a/comm/third_party/botan/src/lib/block/idea/idea_sse2/info.txt b/comm/third_party/botan/src/lib/block/idea/idea_sse2/info.txt new file mode 100644 index 0000000000..b0ca2d02fa --- /dev/null +++ b/comm/third_party/botan/src/lib/block/idea/idea_sse2/info.txt @@ -0,0 +1,7 @@ +<defines> +IDEA_SSE2 -> 20131128 +</defines> + +<isa> +sse2 +</isa> diff --git a/comm/third_party/botan/src/lib/block/idea/info.txt b/comm/third_party/botan/src/lib/block/idea/info.txt new file mode 100644 index 0000000000..bcbdce03f1 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/idea/info.txt @@ -0,0 +1,3 @@ +<defines> +IDEA -> 20131128 +</defines> |