summaryrefslogtreecommitdiffstats
path: root/comm/third_party/botan/src/lib/block/idea
diff options
context:
space:
mode:
Diffstat (limited to 'comm/third_party/botan/src/lib/block/idea')
-rw-r--r--comm/third_party/botan/src/lib/block/idea/idea.cpp240
-rw-r--r--comm/third_party/botan/src/lib/block/idea/idea.h45
-rw-r--r--comm/third_party/botan/src/lib/block/idea/idea_sse2/idea_sse2.cpp208
-rw-r--r--comm/third_party/botan/src/lib/block/idea/idea_sse2/info.txt7
-rw-r--r--comm/third_party/botan/src/lib/block/idea/info.txt3
5 files changed, 503 insertions, 0 deletions
diff --git a/comm/third_party/botan/src/lib/block/idea/idea.cpp b/comm/third_party/botan/src/lib/block/idea/idea.cpp
new file mode 100644
index 0000000000..f8f5ceb348
--- /dev/null
+++ b/comm/third_party/botan/src/lib/block/idea/idea.cpp
@@ -0,0 +1,240 @@
+/*
+* IDEA
+* (C) 1999-2010,2015 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/idea.h>
+#include <botan/loadstor.h>
+#include <botan/cpuid.h>
+#include <botan/internal/ct_utils.h>
+
+namespace Botan {
+
+namespace {
+
+/*
+* Multiplication modulo 65537
+*/
+inline uint16_t mul(uint16_t x, uint16_t y)
+ {
+ const uint32_t P = static_cast<uint32_t>(x) * y;
+ const auto P_mask = CT::Mask<uint16_t>(CT::Mask<uint32_t>::is_zero(P));
+
+ const uint32_t P_hi = P >> 16;
+ const uint32_t P_lo = P & 0xFFFF;
+
+ const uint16_t carry = (P_lo < P_hi);
+ const uint16_t r_1 = static_cast<uint16_t>((P_lo - P_hi) + carry);
+ const uint16_t r_2 = 1 - x - y;
+
+ return P_mask.select(r_2, r_1);
+ }
+
+/*
+* Find multiplicative inverses modulo 65537
+*
+* 65537 is prime; thus Fermat's little theorem tells us that
+* x^65537 == x modulo 65537, which means
+* x^(65537-2) == x^-1 modulo 65537 since
+* x^(65537-2) * x == 1 mod 65537
+*
+* Do the exponentiation with a basic square and multiply: all bits are
+* of exponent are 1 so we always multiply
+*/
+uint16_t mul_inv(uint16_t x)
+ {
+ uint16_t y = x;
+
+ for(size_t i = 0; i != 15; ++i)
+ {
+ y = mul(y, y); // square
+ y = mul(y, x);
+ }
+
+ return y;
+ }
+
+/**
+* IDEA is involutional, depending only on the key schedule
+*/
+void idea_op(const uint8_t in[], uint8_t out[], size_t blocks, const uint16_t K[52])
+ {
+ const size_t BLOCK_SIZE = 8;
+
+ CT::poison(in, blocks * 8);
+ CT::poison(out, blocks * 8);
+ CT::poison(K, 52);
+
+ BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i)
+ {
+ uint16_t X1, X2, X3, X4;
+ load_be(in + BLOCK_SIZE*i, X1, X2, X3, X4);
+
+ for(size_t j = 0; j != 8; ++j)
+ {
+ X1 = mul(X1, K[6*j+0]);
+ X2 += K[6*j+1];
+ X3 += K[6*j+2];
+ X4 = mul(X4, K[6*j+3]);
+
+ const uint16_t T0 = X3;
+ X3 = mul(X3 ^ X1, K[6*j+4]);
+
+ const uint16_t T1 = X2;
+ X2 = mul((X2 ^ X4) + X3, K[6*j+5]);
+ X3 += X2;
+
+ X1 ^= X2;
+ X4 ^= X3;
+ X2 ^= T0;
+ X3 ^= T1;
+ }
+
+ X1 = mul(X1, K[48]);
+ X2 += K[50];
+ X3 += K[49];
+ X4 = mul(X4, K[51]);
+
+ store_be(out + BLOCK_SIZE*i, X1, X3, X2, X4);
+ }
+
+ CT::unpoison(in, blocks * 8);
+ CT::unpoison(out, blocks * 8);
+ CT::unpoison(K, 52);
+ }
+
+}
+
+size_t IDEA::parallelism() const
+ {
+#if defined(BOTAN_HAS_IDEA_SSE2)
+ if(CPUID::has_sse2())
+ {
+ return 8;
+ }
+#endif
+
+ return 1;
+ }
+
+std::string IDEA::provider() const
+ {
+#if defined(BOTAN_HAS_IDEA_SSE2)
+ if(CPUID::has_sse2())
+ {
+ return "sse2";
+ }
+#endif
+
+ return "base";
+ }
+
+/*
+* IDEA Encryption
+*/
+void IDEA::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ verify_key_set(m_EK.empty() == false);
+
+#if defined(BOTAN_HAS_IDEA_SSE2)
+ if(CPUID::has_sse2())
+ {
+ while(blocks >= 8)
+ {
+ sse2_idea_op_8(in, out, m_EK.data());
+ in += 8 * BLOCK_SIZE;
+ out += 8 * BLOCK_SIZE;
+ blocks -= 8;
+ }
+ }
+#endif
+
+ idea_op(in, out, blocks, m_EK.data());
+ }
+
+/*
+* IDEA Decryption
+*/
+void IDEA::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ verify_key_set(m_DK.empty() == false);
+
+#if defined(BOTAN_HAS_IDEA_SSE2)
+ if(CPUID::has_sse2())
+ {
+ while(blocks >= 8)
+ {
+ sse2_idea_op_8(in, out, m_DK.data());
+ in += 8 * BLOCK_SIZE;
+ out += 8 * BLOCK_SIZE;
+ blocks -= 8;
+ }
+ }
+#endif
+
+ idea_op(in, out, blocks, m_DK.data());
+ }
+
+/*
+* IDEA Key Schedule
+*/
+void IDEA::key_schedule(const uint8_t key[], size_t)
+ {
+ m_EK.resize(52);
+ m_DK.resize(52);
+
+ CT::poison(key, 16);
+ CT::poison(m_EK.data(), 52);
+ CT::poison(m_DK.data(), 52);
+
+ secure_vector<uint64_t> K(2);
+
+ K[0] = load_be<uint64_t>(key, 0);
+ K[1] = load_be<uint64_t>(key, 1);
+
+ for(size_t off = 0; off != 48; off += 8)
+ {
+ for(size_t i = 0; i != 8; ++i)
+ m_EK[off+i] = static_cast<uint16_t>(K[i/4] >> (48-16*(i % 4)));
+
+ const uint64_t Kx = (K[0] >> 39);
+ const uint64_t Ky = (K[1] >> 39);
+
+ K[0] = (K[0] << 25) | Ky;
+ K[1] = (K[1] << 25) | Kx;
+ }
+
+ for(size_t i = 0; i != 4; ++i)
+ m_EK[48+i] = static_cast<uint16_t>(K[i/4] >> (48-16*(i % 4)));
+
+ m_DK[0] = mul_inv(m_EK[48]);
+ m_DK[1] = -m_EK[49];
+ m_DK[2] = -m_EK[50];
+ m_DK[3] = mul_inv(m_EK[51]);
+
+ for(size_t i = 0; i != 8*6; i += 6)
+ {
+ m_DK[i+4] = m_EK[46-i];
+ m_DK[i+5] = m_EK[47-i];
+ m_DK[i+6] = mul_inv(m_EK[42-i]);
+ m_DK[i+7] = -m_EK[44-i];
+ m_DK[i+8] = -m_EK[43-i];
+ m_DK[i+9] = mul_inv(m_EK[45-i]);
+ }
+
+ std::swap(m_DK[49], m_DK[50]);
+
+ CT::unpoison(key, 16);
+ CT::unpoison(m_EK.data(), 52);
+ CT::unpoison(m_DK.data(), 52);
+ }
+
+void IDEA::clear()
+ {
+ zap(m_EK);
+ zap(m_DK);
+ }
+
+}
diff --git a/comm/third_party/botan/src/lib/block/idea/idea.h b/comm/third_party/botan/src/lib/block/idea/idea.h
new file mode 100644
index 0000000000..e5e51606b9
--- /dev/null
+++ b/comm/third_party/botan/src/lib/block/idea/idea.h
@@ -0,0 +1,45 @@
+/*
+* IDEA
+* (C) 1999-2007 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#ifndef BOTAN_IDEA_H_
+#define BOTAN_IDEA_H_
+
+#include <botan/block_cipher.h>
+
+BOTAN_FUTURE_INTERNAL_HEADER(idea.h)
+
+namespace Botan {
+
+/**
+* IDEA
+*/
+class BOTAN_PUBLIC_API(2,0) IDEA final : public Block_Cipher_Fixed_Params<8, 16>
+ {
+ public:
+ void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override;
+ void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override;
+
+ void clear() override;
+
+ std::string provider() const override;
+ std::string name() const override { return "IDEA"; }
+ BlockCipher* clone() const override { return new IDEA; }
+ size_t parallelism() const override;
+
+ private:
+#if defined(BOTAN_HAS_IDEA_SSE2)
+ void sse2_idea_op_8(const uint8_t in[64], uint8_t out[64], const uint16_t EK[52]) const;
+#endif
+
+ void key_schedule(const uint8_t[], size_t) override;
+
+ secure_vector<uint16_t> m_EK, m_DK;
+ };
+
+}
+
+#endif
diff --git a/comm/third_party/botan/src/lib/block/idea/idea_sse2/idea_sse2.cpp b/comm/third_party/botan/src/lib/block/idea/idea_sse2/idea_sse2.cpp
new file mode 100644
index 0000000000..93648cfc7a
--- /dev/null
+++ b/comm/third_party/botan/src/lib/block/idea/idea_sse2/idea_sse2.cpp
@@ -0,0 +1,208 @@
+/*
+* IDEA in SSE2
+* (C) 2009 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/idea.h>
+#include <botan/internal/ct_utils.h>
+#include <emmintrin.h>
+
+namespace Botan {
+
+namespace {
+
+BOTAN_FUNC_ISA("sse2")
+inline __m128i mul(__m128i X, uint16_t K_16)
+ {
+ const __m128i zeros = _mm_set1_epi16(0);
+ const __m128i ones = _mm_set1_epi16(1);
+
+ const __m128i K = _mm_set1_epi16(K_16);
+
+ const __m128i X_is_zero = _mm_cmpeq_epi16(X, zeros);
+ const __m128i K_is_zero = _mm_cmpeq_epi16(K, zeros);
+
+ const __m128i mul_lo = _mm_mullo_epi16(X, K);
+ const __m128i mul_hi = _mm_mulhi_epu16(X, K);
+
+ __m128i T = _mm_sub_epi16(mul_lo, mul_hi);
+
+ // Unsigned compare; cmp = 1 if mul_lo < mul_hi else 0
+ const __m128i subs = _mm_subs_epu16(mul_hi, mul_lo);
+ const __m128i cmp = _mm_min_epu8(
+ _mm_or_si128(subs, _mm_srli_epi16(subs, 8)), ones);
+
+ T = _mm_add_epi16(T, cmp);
+
+ /* Selection: if X[i] is zero then assign 1-K
+ if K is zero then assign 1-X[i]
+
+ Could if() off value of K_16 for the second, but this gives a
+ constant time implementation which is a nice bonus.
+ */
+
+ T = _mm_or_si128(
+ _mm_andnot_si128(X_is_zero, T),
+ _mm_and_si128(_mm_sub_epi16(ones, K), X_is_zero));
+
+ T = _mm_or_si128(
+ _mm_andnot_si128(K_is_zero, T),
+ _mm_and_si128(_mm_sub_epi16(ones, X), K_is_zero));
+
+ return T;
+ }
+
+/*
+* 4x8 matrix transpose
+*
+* FIXME: why do I need the extra set of unpack_epi32 here? Inverse in
+* transpose_out doesn't need it. Something with the shuffle? Removing
+* that extra unpack could easily save 3-4 cycles per block, and would
+* also help a lot with register pressure on 32-bit x86
+*/
+BOTAN_FUNC_ISA("sse2")
+void transpose_in(__m128i& B0, __m128i& B1, __m128i& B2, __m128i& B3)
+ {
+ __m128i T0 = _mm_unpackhi_epi32(B0, B1);
+ __m128i T1 = _mm_unpacklo_epi32(B0, B1);
+ __m128i T2 = _mm_unpackhi_epi32(B2, B3);
+ __m128i T3 = _mm_unpacklo_epi32(B2, B3);
+
+ __m128i T4 = _mm_unpacklo_epi32(T0, T1);
+ __m128i T5 = _mm_unpackhi_epi32(T0, T1);
+ __m128i T6 = _mm_unpacklo_epi32(T2, T3);
+ __m128i T7 = _mm_unpackhi_epi32(T2, T3);
+
+ T0 = _mm_shufflehi_epi16(T4, _MM_SHUFFLE(1, 3, 0, 2));
+ T1 = _mm_shufflehi_epi16(T5, _MM_SHUFFLE(1, 3, 0, 2));
+ T2 = _mm_shufflehi_epi16(T6, _MM_SHUFFLE(1, 3, 0, 2));
+ T3 = _mm_shufflehi_epi16(T7, _MM_SHUFFLE(1, 3, 0, 2));
+
+ T0 = _mm_shufflelo_epi16(T0, _MM_SHUFFLE(1, 3, 0, 2));
+ T1 = _mm_shufflelo_epi16(T1, _MM_SHUFFLE(1, 3, 0, 2));
+ T2 = _mm_shufflelo_epi16(T2, _MM_SHUFFLE(1, 3, 0, 2));
+ T3 = _mm_shufflelo_epi16(T3, _MM_SHUFFLE(1, 3, 0, 2));
+
+ T0 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3, 1, 2, 0));
+ T1 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(3, 1, 2, 0));
+ T2 = _mm_shuffle_epi32(T2, _MM_SHUFFLE(3, 1, 2, 0));
+ T3 = _mm_shuffle_epi32(T3, _MM_SHUFFLE(3, 1, 2, 0));
+
+ B0 = _mm_unpacklo_epi64(T0, T2);
+ B1 = _mm_unpackhi_epi64(T0, T2);
+ B2 = _mm_unpacklo_epi64(T1, T3);
+ B3 = _mm_unpackhi_epi64(T1, T3);
+ }
+
+/*
+* 4x8 matrix transpose (reverse)
+*/
+BOTAN_FUNC_ISA("sse2")
+void transpose_out(__m128i& B0, __m128i& B1, __m128i& B2, __m128i& B3)
+ {
+ __m128i T0 = _mm_unpacklo_epi64(B0, B1);
+ __m128i T1 = _mm_unpacklo_epi64(B2, B3);
+ __m128i T2 = _mm_unpackhi_epi64(B0, B1);
+ __m128i T3 = _mm_unpackhi_epi64(B2, B3);
+
+ T0 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3, 1, 2, 0));
+ T1 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(3, 1, 2, 0));
+ T2 = _mm_shuffle_epi32(T2, _MM_SHUFFLE(3, 1, 2, 0));
+ T3 = _mm_shuffle_epi32(T3, _MM_SHUFFLE(3, 1, 2, 0));
+
+ T0 = _mm_shufflehi_epi16(T0, _MM_SHUFFLE(3, 1, 2, 0));
+ T1 = _mm_shufflehi_epi16(T1, _MM_SHUFFLE(3, 1, 2, 0));
+ T2 = _mm_shufflehi_epi16(T2, _MM_SHUFFLE(3, 1, 2, 0));
+ T3 = _mm_shufflehi_epi16(T3, _MM_SHUFFLE(3, 1, 2, 0));
+
+ T0 = _mm_shufflelo_epi16(T0, _MM_SHUFFLE(3, 1, 2, 0));
+ T1 = _mm_shufflelo_epi16(T1, _MM_SHUFFLE(3, 1, 2, 0));
+ T2 = _mm_shufflelo_epi16(T2, _MM_SHUFFLE(3, 1, 2, 0));
+ T3 = _mm_shufflelo_epi16(T3, _MM_SHUFFLE(3, 1, 2, 0));
+
+ B0 = _mm_unpacklo_epi32(T0, T1);
+ B1 = _mm_unpackhi_epi32(T0, T1);
+ B2 = _mm_unpacklo_epi32(T2, T3);
+ B3 = _mm_unpackhi_epi32(T2, T3);
+ }
+
+}
+
+/*
+* 8 wide IDEA encryption/decryption in SSE2
+*/
+BOTAN_FUNC_ISA("sse2")
+void IDEA::sse2_idea_op_8(const uint8_t in[64], uint8_t out[64], const uint16_t EK[52]) const
+ {
+ CT::poison(in, 64);
+ CT::poison(out, 64);
+ CT::poison(EK, 52);
+
+ const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
+
+ __m128i B0 = _mm_loadu_si128(in_mm + 0);
+ __m128i B1 = _mm_loadu_si128(in_mm + 1);
+ __m128i B2 = _mm_loadu_si128(in_mm + 2);
+ __m128i B3 = _mm_loadu_si128(in_mm + 3);
+
+ transpose_in(B0, B1, B2, B3);
+
+ // byte swap
+ B0 = _mm_or_si128(_mm_slli_epi16(B0, 8), _mm_srli_epi16(B0, 8));
+ B1 = _mm_or_si128(_mm_slli_epi16(B1, 8), _mm_srli_epi16(B1, 8));
+ B2 = _mm_or_si128(_mm_slli_epi16(B2, 8), _mm_srli_epi16(B2, 8));
+ B3 = _mm_or_si128(_mm_slli_epi16(B3, 8), _mm_srli_epi16(B3, 8));
+
+ for(size_t i = 0; i != 8; ++i)
+ {
+ B0 = mul(B0, EK[6*i+0]);
+ B1 = _mm_add_epi16(B1, _mm_set1_epi16(EK[6*i+1]));
+ B2 = _mm_add_epi16(B2, _mm_set1_epi16(EK[6*i+2]));
+ B3 = mul(B3, EK[6*i+3]);
+
+ __m128i T0 = B2;
+ B2 = _mm_xor_si128(B2, B0);
+ B2 = mul(B2, EK[6*i+4]);
+
+ __m128i T1 = B1;
+
+ B1 = _mm_xor_si128(B1, B3);
+ B1 = _mm_add_epi16(B1, B2);
+ B1 = mul(B1, EK[6*i+5]);
+
+ B2 = _mm_add_epi16(B2, B1);
+
+ B0 = _mm_xor_si128(B0, B1);
+ B1 = _mm_xor_si128(B1, T0);
+ B3 = _mm_xor_si128(B3, B2);
+ B2 = _mm_xor_si128(B2, T1);
+ }
+
+ B0 = mul(B0, EK[48]);
+ B1 = _mm_add_epi16(B1, _mm_set1_epi16(EK[50]));
+ B2 = _mm_add_epi16(B2, _mm_set1_epi16(EK[49]));
+ B3 = mul(B3, EK[51]);
+
+ // byte swap
+ B0 = _mm_or_si128(_mm_slli_epi16(B0, 8), _mm_srli_epi16(B0, 8));
+ B1 = _mm_or_si128(_mm_slli_epi16(B1, 8), _mm_srli_epi16(B1, 8));
+ B2 = _mm_or_si128(_mm_slli_epi16(B2, 8), _mm_srli_epi16(B2, 8));
+ B3 = _mm_or_si128(_mm_slli_epi16(B3, 8), _mm_srli_epi16(B3, 8));
+
+ transpose_out(B0, B2, B1, B3);
+
+ __m128i* out_mm = reinterpret_cast<__m128i*>(out);
+
+ _mm_storeu_si128(out_mm + 0, B0);
+ _mm_storeu_si128(out_mm + 1, B2);
+ _mm_storeu_si128(out_mm + 2, B1);
+ _mm_storeu_si128(out_mm + 3, B3);
+
+ CT::unpoison(in, 64);
+ CT::unpoison(out, 64);
+ CT::unpoison(EK, 52);
+ }
+
+}
diff --git a/comm/third_party/botan/src/lib/block/idea/idea_sse2/info.txt b/comm/third_party/botan/src/lib/block/idea/idea_sse2/info.txt
new file mode 100644
index 0000000000..b0ca2d02fa
--- /dev/null
+++ b/comm/third_party/botan/src/lib/block/idea/idea_sse2/info.txt
@@ -0,0 +1,7 @@
+<defines>
+IDEA_SSE2 -> 20131128
+</defines>
+
+<isa>
+sse2
+</isa>
diff --git a/comm/third_party/botan/src/lib/block/idea/info.txt b/comm/third_party/botan/src/lib/block/idea/info.txt
new file mode 100644
index 0000000000..bcbdce03f1
--- /dev/null
+++ b/comm/third_party/botan/src/lib/block/idea/info.txt
@@ -0,0 +1,3 @@
+<defines>
+IDEA -> 20131128
+</defines>