summaryrefslogtreecommitdiffstats
path: root/comm/third_party/botan/src/lib/block/serpent
diff options
context:
space:
mode:
Diffstat (limited to 'comm/third_party/botan/src/lib/block/serpent')
-rw-r--r--comm/third_party/botan/src/lib/block/serpent/info.txt11
-rw-r--r--comm/third_party/botan/src/lib/block/serpent/serpent.cpp299
-rw-r--r--comm/third_party/botan/src/lib/block/serpent/serpent.h53
-rw-r--r--comm/third_party/botan/src/lib/block/serpent/serpent_avx2/info.txt17
-rw-r--r--comm/third_party/botan/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp169
-rw-r--r--comm/third_party/botan/src/lib/block/serpent/serpent_sbox.h446
-rw-r--r--comm/third_party/botan/src/lib/block/serpent/serpent_simd/info.txt7
-rw-r--r--comm/third_party/botan/src/lib/block/serpent/serpent_simd/serpent_simd.cpp169
8 files changed, 1171 insertions, 0 deletions
diff --git a/comm/third_party/botan/src/lib/block/serpent/info.txt b/comm/third_party/botan/src/lib/block/serpent/info.txt
new file mode 100644
index 0000000000..89b860ce4f
--- /dev/null
+++ b/comm/third_party/botan/src/lib/block/serpent/info.txt
@@ -0,0 +1,11 @@
+<defines>
+SERPENT -> 20131128
+</defines>
+
+<header:public>
+serpent.h
+</header:public>
+
+<header:internal>
+serpent_sbox.h
+</header:internal>
diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent.cpp b/comm/third_party/botan/src/lib/block/serpent/serpent.cpp
new file mode 100644
index 0000000000..ff37a177c7
--- /dev/null
+++ b/comm/third_party/botan/src/lib/block/serpent/serpent.cpp
@@ -0,0 +1,299 @@
+/*
+* Serpent
+* (C) 1999-2007 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/serpent.h>
+#include <botan/loadstor.h>
+#include <botan/rotate.h>
+#include <botan/internal/serpent_sbox.h>
+
+#if defined(BOTAN_HAS_SERPENT_SIMD) || defined(BOTAN_HAS_SERPENT_AVX2)
+ #include <botan/cpuid.h>
+#endif
+
+namespace Botan {
+
+namespace {
+
+/*
+* Serpent's Linear Transform
+*/
+inline void transform(uint32_t& B0, uint32_t& B1, uint32_t& B2, uint32_t& B3)
+ {
+ B0 = rotl<13>(B0); B2 = rotl<3>(B2);
+ B1 ^= B0 ^ B2; B3 ^= B2 ^ (B0 << 3);
+ B1 = rotl<1>(B1); B3 = rotl<7>(B3);
+ B0 ^= B1 ^ B3; B2 ^= B3 ^ (B1 << 7);
+ B0 = rotl<5>(B0); B2 = rotl<22>(B2);
+ }
+
+/*
+* Serpent's Inverse Linear Transform
+*/
+inline void i_transform(uint32_t& B0, uint32_t& B1, uint32_t& B2, uint32_t& B3)
+ {
+ B2 = rotr<22>(B2); B0 = rotr<5>(B0);
+ B2 ^= B3 ^ (B1 << 7); B0 ^= B1 ^ B3;
+ B3 = rotr<7>(B3); B1 = rotr<1>(B1);
+ B3 ^= B2 ^ (B0 << 3); B1 ^= B0 ^ B2;
+ B2 = rotr<3>(B2); B0 = rotr<13>(B0);
+ }
+
+}
+
+/*
+* XOR a key block with a data block
+*/
+#define key_xor(round, B0, B1, B2, B3) \
+ B0 ^= m_round_key[4*round ]; \
+ B1 ^= m_round_key[4*round+1]; \
+ B2 ^= m_round_key[4*round+2]; \
+ B3 ^= m_round_key[4*round+3];
+
+/*
+* Serpent Encryption
+*/
+void Serpent::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ verify_key_set(m_round_key.empty() == false);
+
+#if defined(BOTAN_HAS_SERPENT_AVX2)
+ if(CPUID::has_avx2())
+ {
+ while(blocks >= 8)
+ {
+ avx2_encrypt_8(in, out);
+ in += 8 * BLOCK_SIZE;
+ out += 8 * BLOCK_SIZE;
+ blocks -= 8;
+ }
+ }
+#endif
+
+#if defined(BOTAN_HAS_SERPENT_SIMD)
+ if(CPUID::has_simd_32())
+ {
+ while(blocks >= 4)
+ {
+ simd_encrypt_4(in, out);
+ in += 4 * BLOCK_SIZE;
+ out += 4 * BLOCK_SIZE;
+ blocks -= 4;
+ }
+ }
+#endif
+
+ BOTAN_PARALLEL_SIMD_FOR(size_t i = 0; i < blocks; ++i)
+ {
+ uint32_t B0, B1, B2, B3;
+ load_le(in + 16*i, B0, B1, B2, B3);
+
+ key_xor( 0,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 1,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 2,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 3,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 4,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 5,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 6,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 7,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 8,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 9,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(10,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(11,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(12,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(13,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(14,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(15,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(16,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(17,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(18,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(19,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(20,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(21,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(22,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(23,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(24,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(25,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(26,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(27,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(28,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(29,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(30,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(31,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3);
+
+ store_le(out + 16*i, B0, B1, B2, B3);
+ }
+ }
+
+/*
+* Serpent Decryption
+*/
+void Serpent::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
+ {
+ verify_key_set(m_round_key.empty() == false);
+
+#if defined(BOTAN_HAS_SERPENT_AVX2)
+ if(CPUID::has_avx2())
+ {
+ while(blocks >= 8)
+ {
+ avx2_decrypt_8(in, out);
+ in += 8 * BLOCK_SIZE;
+ out += 8 * BLOCK_SIZE;
+ blocks -= 8;
+ }
+ }
+#endif
+
+#if defined(BOTAN_HAS_SERPENT_SIMD)
+ if(CPUID::has_simd_32())
+ {
+ while(blocks >= 4)
+ {
+ simd_decrypt_4(in, out);
+ in += 4 * BLOCK_SIZE;
+ out += 4 * BLOCK_SIZE;
+ blocks -= 4;
+ }
+ }
+#endif
+
+ BOTAN_PARALLEL_SIMD_FOR(size_t i = 0; i < blocks; ++i)
+ {
+ uint32_t B0, B1, B2, B3;
+ load_le(in + 16*i, B0, B1, B2, B3);
+
+ key_xor(32,B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(28,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(27,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(24,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(21,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(20,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(19,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(16,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(13,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(12,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(11,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 8,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor( 5,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor( 4,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor( 3,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3);
+
+ store_le(out + 16*i, B0, B1, B2, B3);
+ }
+ }
+
+#undef key_xor
+#undef transform
+#undef i_transform
+
+/*
+* Serpent Key Schedule
+*/
+void Serpent::key_schedule(const uint8_t key[], size_t length)
+ {
+ const uint32_t PHI = 0x9E3779B9;
+
+ secure_vector<uint32_t> W(140);
+ for(size_t i = 0; i != length / 4; ++i)
+ W[i] = load_le<uint32_t>(key, i);
+
+ W[length / 4] |= uint32_t(1) << ((length%4)*8);
+
+ for(size_t i = 8; i != 140; ++i)
+ {
+ uint32_t wi = W[i-8] ^ W[i-5] ^ W[i-3] ^ W[i-1] ^ PHI ^ uint32_t(i-8);
+ W[i] = rotl<11>(wi);
+ }
+
+ SBoxE0(W[ 20],W[ 21],W[ 22],W[ 23]);
+ SBoxE0(W[ 52],W[ 53],W[ 54],W[ 55]);
+ SBoxE0(W[ 84],W[ 85],W[ 86],W[ 87]);
+ SBoxE0(W[116],W[117],W[118],W[119]);
+
+ SBoxE1(W[ 16],W[ 17],W[ 18],W[ 19]);
+ SBoxE1(W[ 48],W[ 49],W[ 50],W[ 51]);
+ SBoxE1(W[ 80],W[ 81],W[ 82],W[ 83]);
+ SBoxE1(W[112],W[113],W[114],W[115]);
+
+ SBoxE2(W[ 12],W[ 13],W[ 14],W[ 15]);
+ SBoxE2(W[ 44],W[ 45],W[ 46],W[ 47]);
+ SBoxE2(W[ 76],W[ 77],W[ 78],W[ 79]);
+ SBoxE2(W[108],W[109],W[110],W[111]);
+
+ SBoxE3(W[ 8],W[ 9],W[ 10],W[ 11]);
+ SBoxE3(W[ 40],W[ 41],W[ 42],W[ 43]);
+ SBoxE3(W[ 72],W[ 73],W[ 74],W[ 75]);
+ SBoxE3(W[104],W[105],W[106],W[107]);
+ SBoxE3(W[136],W[137],W[138],W[139]);
+
+ SBoxE4(W[ 36],W[ 37],W[ 38],W[ 39]);
+ SBoxE4(W[ 68],W[ 69],W[ 70],W[ 71]);
+ SBoxE4(W[100],W[101],W[102],W[103]);
+ SBoxE4(W[132],W[133],W[134],W[135]);
+
+ SBoxE5(W[ 32],W[ 33],W[ 34],W[ 35]);
+ SBoxE5(W[ 64],W[ 65],W[ 66],W[ 67]);
+ SBoxE5(W[ 96],W[ 97],W[ 98],W[ 99]);
+ SBoxE5(W[128],W[129],W[130],W[131]);
+
+ SBoxE6(W[ 28],W[ 29],W[ 30],W[ 31]);
+ SBoxE6(W[ 60],W[ 61],W[ 62],W[ 63]);
+ SBoxE6(W[ 92],W[ 93],W[ 94],W[ 95]);
+ SBoxE6(W[124],W[125],W[126],W[127]);
+
+ SBoxE7(W[ 24],W[ 25],W[ 26],W[ 27]);
+ SBoxE7(W[ 56],W[ 57],W[ 58],W[ 59]);
+ SBoxE7(W[ 88],W[ 89],W[ 90],W[ 91]);
+ SBoxE7(W[120],W[121],W[122],W[123]);
+
+ m_round_key.assign(W.begin() + 8, W.end());
+ }
+
+void Serpent::clear()
+ {
+ zap(m_round_key);
+ }
+
+std::string Serpent::provider() const
+ {
+#if defined(BOTAN_HAS_SERPENT_AVX2)
+ if(CPUID::has_avx2())
+ {
+ return "avx2";
+ }
+#endif
+
+#if defined(BOTAN_HAS_SERPENT_SIMD)
+ if(CPUID::has_simd_32())
+ {
+ return "simd";
+ }
+#endif
+
+ return "base";
+ }
+
+#undef key_xor
+
+}
diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent.h b/comm/third_party/botan/src/lib/block/serpent/serpent.h
new file mode 100644
index 0000000000..64eb8a8b04
--- /dev/null
+++ b/comm/third_party/botan/src/lib/block/serpent/serpent.h
@@ -0,0 +1,53 @@
+/*
+* Serpent
+* (C) 1999-2007 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#ifndef BOTAN_SERPENT_H_
+#define BOTAN_SERPENT_H_
+
+#include <botan/block_cipher.h>
+
+BOTAN_FUTURE_INTERNAL_HEADER(serpent.h)
+
+namespace Botan {
+
+/**
+* Serpent is the most conservative of the AES finalists
+* https://www.cl.cam.ac.uk/~rja14/serpent.html
+*/
+class BOTAN_PUBLIC_API(2,0) Serpent final : public Block_Cipher_Fixed_Params<16, 16, 32, 8>
+ {
+ public:
+ void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override;
+ void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override;
+
+ void clear() override;
+ std::string provider() const override;
+ std::string name() const override { return "Serpent"; }
+ BlockCipher* clone() const override { return new Serpent; }
+
+ size_t parallelism() const override { return 4; }
+
+ private:
+
+#if defined(BOTAN_HAS_SERPENT_SIMD)
+ void simd_encrypt_4(const uint8_t in[64], uint8_t out[64]) const;
+ void simd_decrypt_4(const uint8_t in[64], uint8_t out[64]) const;
+#endif
+
+#if defined(BOTAN_HAS_SERPENT_AVX2)
+ void avx2_encrypt_8(const uint8_t in[64], uint8_t out[64]) const;
+ void avx2_decrypt_8(const uint8_t in[64], uint8_t out[64]) const;
+#endif
+
+ void key_schedule(const uint8_t key[], size_t length) override;
+
+ secure_vector<uint32_t> m_round_key;
+ };
+
+}
+
+#endif
diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/info.txt b/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/info.txt
new file mode 100644
index 0000000000..b0fbfb334e
--- /dev/null
+++ b/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/info.txt
@@ -0,0 +1,17 @@
+<defines>
+SERPENT_AVX2 -> 20180824
+</defines>
+
+<isa>
+avx2
+</isa>
+
+<requires>
+simd_avx2
+</requires>
+
+# We must exclude MSVC due to #2120
+<cc>
+gcc
+clang
+</cc>
diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp b/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp
new file mode 100644
index 0000000000..0db332035d
--- /dev/null
+++ b/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp
@@ -0,0 +1,169 @@
+/*
+* (C) 2018 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/serpent.h>
+#include <botan/internal/serpent_sbox.h>
+#include <botan/internal/simd_avx2.h>
+
+namespace Botan {
+
+
+#define key_xor(round, B0, B1, B2, B3) \
+ do { \
+ B0 ^= SIMD_8x32::splat(m_round_key[4*round ]); \
+ B1 ^= SIMD_8x32::splat(m_round_key[4*round+1]); \
+ B2 ^= SIMD_8x32::splat(m_round_key[4*round+2]); \
+ B3 ^= SIMD_8x32::splat(m_round_key[4*round+3]); \
+ } while(0)
+
+/*
+* Serpent's linear transformations
+*/
+#define transform(B0, B1, B2, B3) \
+ do { \
+ B0 = B0.rotl<13>(); \
+ B2 = B2.rotl<3>(); \
+ B1 ^= B0 ^ B2; \
+ B3 ^= B2 ^ B0.shl<3>(); \
+ B1 = B1.rotl<1>(); \
+ B3 = B3.rotl<7>(); \
+ B0 ^= B1 ^ B3; \
+ B2 ^= B3 ^ B1.shl<7>(); \
+ B0 = B0.rotl<5>(); \
+ B2 = B2.rotl<22>(); \
+ } while(0)
+
+#define i_transform(B0, B1, B2, B3) \
+ do { \
+ B2 = B2.rotr<22>(); \
+ B0 = B0.rotr<5>(); \
+ B2 ^= B3 ^ B1.shl<7>(); \
+ B0 ^= B1 ^ B3; \
+ B3 = B3.rotr<7>(); \
+ B1 = B1.rotr<1>(); \
+ B3 ^= B2 ^ B0.shl<3>(); \
+ B1 ^= B0 ^ B2; \
+ B2 = B2.rotr<3>(); \
+ B0 = B0.rotr<13>(); \
+ } while(0)
+
+BOTAN_FUNC_ISA("avx2")
+void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const
+ {
+ SIMD_8x32::reset_registers();
+
+ SIMD_8x32 B0 = SIMD_8x32::load_le(in);
+ SIMD_8x32 B1 = SIMD_8x32::load_le(in + 32);
+ SIMD_8x32 B2 = SIMD_8x32::load_le(in + 64);
+ SIMD_8x32 B3 = SIMD_8x32::load_le(in + 96);
+
+ SIMD_8x32::transpose(B0, B1, B2, B3);
+
+ key_xor( 0,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 1,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 2,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 3,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 4,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 5,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 6,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 7,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 8,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 9,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(10,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(11,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(12,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(13,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(14,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(15,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(16,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(17,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(18,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(19,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(20,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(21,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(22,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(23,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(24,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(25,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(26,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(27,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(28,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(29,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(30,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(31,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3);
+
+ SIMD_8x32::transpose(B0, B1, B2, B3);
+ B0.store_le(out);
+ B1.store_le(out + 32);
+ B2.store_le(out + 64);
+ B3.store_le(out + 96);
+
+ SIMD_8x32::zero_registers();
+ }
+
+BOTAN_FUNC_ISA("avx2")
+void Serpent::avx2_decrypt_8(const uint8_t in[128], uint8_t out[128]) const
+ {
+ SIMD_8x32::reset_registers();
+
+ SIMD_8x32 B0 = SIMD_8x32::load_le(in);
+ SIMD_8x32 B1 = SIMD_8x32::load_le(in + 32);
+ SIMD_8x32 B2 = SIMD_8x32::load_le(in + 64);
+ SIMD_8x32 B3 = SIMD_8x32::load_le(in + 96);
+
+ SIMD_8x32::transpose(B0, B1, B2, B3);
+
+ key_xor(32,B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(28,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(27,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(24,B0,B1,B2,B3);
+
+ i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(21,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(20,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(19,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(16,B0,B1,B2,B3);
+
+ i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(13,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(12,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(11,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 8,B0,B1,B2,B3);
+
+ i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor( 5,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor( 4,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor( 3,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3);
+
+ SIMD_8x32::transpose(B0, B1, B2, B3);
+
+ B0.store_le(out);
+ B1.store_le(out + 32);
+ B2.store_le(out + 64);
+ B3.store_le(out + 96);
+
+ SIMD_8x32::zero_registers();
+ }
+
+#undef key_xor
+#undef transform
+#undef i_transform
+
+}
diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent_sbox.h b/comm/third_party/botan/src/lib/block/serpent/serpent_sbox.h
new file mode 100644
index 0000000000..31471e7247
--- /dev/null
+++ b/comm/third_party/botan/src/lib/block/serpent/serpent_sbox.h
@@ -0,0 +1,446 @@
+/*
+* Serpent SBox Expressions
+* (C) 1999-2007,2013 Jack Lloyd
+*
+* The sbox expressions used here were discovered by Dag Arne Osvik and
+* are described in his paper "Speeding Up Serpent".
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#ifndef BOTAN_SERPENT_SBOX_H_
+#define BOTAN_SERPENT_SBOX_H_
+
+#include <botan/build.h>
+
+template<typename T>
+BOTAN_FORCE_INLINE void SBoxE0(T& a, T& b, T& c, T& d)
+ {
+ d ^= a;
+ T t0 = b;
+ b &= d;
+ t0 ^= c;
+ b ^= a;
+ a |= d;
+ a ^= t0;
+ t0 ^= d;
+ d ^= c;
+ c |= b;
+ c ^= t0;
+ t0 = ~t0;
+ t0 |= b;
+ b ^= d;
+ b ^= t0;
+ d |= a;
+ b ^= d;
+ t0 ^= d;
+ d = a;
+ a = b;
+ b = t0;
+ }
+
+template<typename T>
+BOTAN_FORCE_INLINE void SBoxE1(T& a, T& b, T& c, T& d)
+ {
+ a = ~a;
+ c = ~c;
+ T t0 = a;
+ a &= b;
+ c ^= a;
+ a |= d;
+ d ^= c;
+ b ^= a;
+ a ^= t0;
+ t0 |= b;
+ b ^= d;
+ c |= a;
+ c &= t0;
+ a ^= b;
+ b &= c;
+ b ^= a;
+ a &= c;
+ t0 ^= a;
+ a = c;
+ c = d;
+ d = b;
+ b = t0;
+ }
+
+template<typename T>
+BOTAN_FORCE_INLINE void SBoxE2(T& a, T& b, T& c, T& d)
+ {
+ T t0 = a;
+ a &= c;
+ a ^= d;
+ c ^= b;
+ c ^= a;
+ d |= t0;
+ d ^= b;
+ t0 ^= c;
+ b = d;
+ d |= t0;
+ d ^= a;
+ a &= b;
+ t0 ^= a;
+ b ^= d;
+ b ^= t0;
+ a = c;
+ c = b;
+ b = d;
+ d = ~t0;
+ }
+
+template<typename T>
+BOTAN_FORCE_INLINE void SBoxE3(T& a, T& b, T& c, T& d)
+ {
+ T t0 = a;
+ a |= d;
+ d ^= b;
+ b &= t0;
+ t0 ^= c;
+ c ^= d;
+ d &= a;
+ t0 |= b;
+ d ^= t0;
+ a ^= b;
+ t0 &= a;
+ b ^= d;
+ t0 ^= c;
+ b |= a;
+ b ^= c;
+ a ^= d;
+ c = b;
+ b |= d;
+ a ^= b;
+ b = c;
+ c = d;
+ d = t0;
+ }
+
+template<typename T>
+BOTAN_FORCE_INLINE void SBoxE4(T& a, T& b, T& c, T& d)
+ {
+ b ^= d;
+ d = ~d;
+ c ^= d;
+ d ^= a;
+ T t0 = b;
+ b &= d;
+ b ^= c;
+ t0 ^= d;
+ a ^= t0;
+ c &= t0;
+ c ^= a;
+ a &= b;
+ d ^= a;
+ t0 |= b;
+ t0 ^= a;
+ a |= d;
+ a ^= c;
+ c &= d;
+ a = ~a;
+ t0 ^= c;
+ c = a;
+ a = b;
+ b = t0;
+ }
+
+template<typename T>
+BOTAN_FORCE_INLINE void SBoxE5(T& a, T& b, T& c, T& d)
+ {
+ a ^= b;
+ b ^= d;
+ d = ~d;
+ T t0 = b;
+ b &= a;
+ c ^= d;
+ b ^= c;
+ c |= t0;
+ t0 ^= d;
+ d &= b;
+ d ^= a;
+ t0 ^= b;
+ t0 ^= c;
+ c ^= a;
+ a &= d;
+ c = ~c;
+ a ^= t0;
+ t0 |= d;
+ t0 ^= c;
+ c = a;
+ a = b;
+ b = d;
+ d = t0;
+ }
+
+template<typename T>
+BOTAN_FORCE_INLINE void SBoxE6(T& a, T& b, T& c, T& d)
+ {
+ c = ~c;
+ T t0 = d;
+ d &= a;
+ a ^= t0;
+ d ^= c;
+ c |= t0;
+ b ^= d;
+ c ^= a;
+ a |= b;
+ c ^= b;
+ t0 ^= a;
+ a |= d;
+ a ^= c;
+ t0 ^= d;
+ t0 ^= a;
+ d = ~d;
+ c &= t0;
+ d ^= c;
+ c = t0;
+ }
+
+template<typename T>
+BOTAN_FORCE_INLINE void SBoxE7(T& a, T& b, T& c, T& d)
+ {
+ T t0 = b;
+ b |= c;
+ b ^= d;
+ t0 ^= c;
+ c ^= b;
+ d |= t0;
+ d &= a;
+ t0 ^= c;
+ d ^= b;
+ b |= t0;
+ b ^= a;
+ a |= t0;
+ a ^= c;
+ b ^= t0;
+ c ^= b;
+ b &= a;
+ b ^= t0;
+ c = ~c;
+ c |= a;
+ t0 ^= c;
+ c = b;
+ b = d;
+ d = a;
+ a = t0;
+ }
+
+template<typename T>
+BOTAN_FORCE_INLINE void SBoxD0(T& a, T& b, T& c, T& d)
+ {
+ c = ~c;
+ T t0 = b;
+ b |= a;
+ t0 = ~t0;
+ b ^= c;
+ c |= t0;
+ b ^= d;
+ a ^= t0;
+ c ^= a;
+ a &= d;
+ t0 ^= a;
+ a |= b;
+ a ^= c;
+ d ^= t0;
+ c ^= b;
+ d ^= a;
+ d ^= b;
+ c &= d;
+ t0 ^= c;
+ c = b;
+ b = t0;
+ }
+
+template<typename T>
+BOTAN_FORCE_INLINE void SBoxD1(T& a, T& b, T& c, T& d)
+ {
+ T t0 = b;
+ b ^= d;
+ d &= b;
+ t0 ^= c;
+ d ^= a;
+ a |= b;
+ c ^= d;
+ a ^= t0;
+ a |= c;
+ b ^= d;
+ a ^= b;
+ b |= d;
+ b ^= a;
+ t0 = ~t0;
+ t0 ^= b;
+ b |= a;
+ b ^= a;
+ b |= t0;
+ d ^= b;
+ b = a;
+ a = t0;
+ t0 = c;
+ c = d;
+ d = t0;
+ }
+
+template<typename T>
+BOTAN_FORCE_INLINE void SBoxD2(T& a, T& b, T& c, T& d)
+ {
+ c ^= d;
+ d ^= a;
+ T t0 = d;
+ d &= c;
+ d ^= b;
+ b |= c;
+ b ^= t0;
+ t0 &= d;
+ c ^= d;
+ t0 &= a;
+ t0 ^= c;
+ c &= b;
+ c |= a;
+ d = ~d;
+ c ^= d;
+ a ^= d;
+ a &= b;
+ d ^= t0;
+ d ^= a;
+ a = b;
+ b = t0;
+ }
+
+template<typename T>
+BOTAN_FORCE_INLINE void SBoxD3(T& a, T& b, T& c, T& d)
+ {
+ T t0 = c;
+ c ^= b;
+ a ^= c;
+ t0 &= c;
+ t0 ^= a;
+ a &= b;
+ b ^= d;
+ d |= t0;
+ c ^= d;
+ a ^= d;
+ b ^= t0;
+ d &= c;
+ d ^= b;
+ b ^= a;
+ b |= c;
+ a ^= d;
+ b ^= t0;
+ a ^= b;
+ t0 = a;
+ a = c;
+ c = d;
+ d = t0;
+ }
+
+template<typename T>
+BOTAN_FORCE_INLINE void SBoxD4(T& a, T& b, T& c, T& d)
+ {
+ T t0 = c;
+ c &= d;
+ c ^= b;
+ b |= d;
+ b &= a;
+ t0 ^= c;
+ t0 ^= b;
+ b &= c;
+ a = ~a;
+ d ^= t0;
+ b ^= d;
+ d &= a;
+ d ^= c;
+ a ^= b;
+ c &= a;
+ d ^= a;
+ c ^= t0;
+ c |= d;
+ d ^= a;
+ c ^= b;
+ b = d;
+ d = t0;
+ }
+
+template<typename T>
+BOTAN_FORCE_INLINE void SBoxD5(T& a, T& b, T& c, T& d)
+ {
+ b = ~b;
+ T t0 = d;
+ c ^= b;
+ d |= a;
+ d ^= c;
+ c |= b;
+ c &= a;
+ t0 ^= d;
+ c ^= t0;
+ t0 |= a;
+ t0 ^= b;
+ b &= c;
+ b ^= d;
+ t0 ^= c;
+ d &= t0;
+ t0 ^= b;
+ d ^= t0;
+ t0 = ~t0;
+ d ^= a;
+ a = b;
+ b = t0;
+ t0 = d;
+ d = c;
+ c = t0;
+ }
+
+template<typename T>
+BOTAN_FORCE_INLINE void SBoxD6(T& a, T& b, T& c, T& d)
+ {
+ a ^= c;
+ T t0 = c;
+ c &= a;
+ t0 ^= d;
+ c = ~c;
+ d ^= b;
+ c ^= d;
+ t0 |= a;
+ a ^= c;
+ d ^= t0;
+ t0 ^= b;
+ b &= d;
+ b ^= a;
+ a ^= d;
+ a |= c;
+ d ^= b;
+ t0 ^= a;
+ a = b;
+ b = c;
+ c = t0;
+ }
+
+template<typename T>
+BOTAN_FORCE_INLINE void SBoxD7(T& a, T& b, T& c, T& d)
+ {
+ T t0 = c;
+ c ^= a;
+ a &= d;
+ t0 |= d;
+ c = ~c;
+ d ^= b;
+ b |= a;
+ a ^= c;
+ c &= t0;
+ d &= t0;
+ b ^= c;
+ c ^= a;
+ a |= c;
+ t0 ^= b;
+ a ^= d;
+ d ^= t0;
+ t0 |= a;
+ d ^= c;
+ t0 ^= c;
+ c = b;
+ b = a;
+ a = d;
+ d = t0;
+ }
+
+#endif
diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent_simd/info.txt b/comm/third_party/botan/src/lib/block/serpent/serpent_simd/info.txt
new file mode 100644
index 0000000000..f7dadf33fc
--- /dev/null
+++ b/comm/third_party/botan/src/lib/block/serpent/serpent_simd/info.txt
@@ -0,0 +1,7 @@
+<defines>
+SERPENT_SIMD -> 20160903
+</defines>
+
+<requires>
+simd
+</requires>
diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent_simd/serpent_simd.cpp b/comm/third_party/botan/src/lib/block/serpent/serpent_simd/serpent_simd.cpp
new file mode 100644
index 0000000000..8ac783ba5c
--- /dev/null
+++ b/comm/third_party/botan/src/lib/block/serpent/serpent_simd/serpent_simd.cpp
@@ -0,0 +1,169 @@
+/*
+* Serpent (SIMD)
+* (C) 2009,2013 Jack Lloyd
+*
+* Botan is released under the Simplified BSD License (see license.txt)
+*/
+
+#include <botan/serpent.h>
+#include <botan/internal/serpent_sbox.h>
+#include <botan/internal/simd_32.h>
+
+namespace Botan {
+
+#define key_xor(round, B0, B1, B2, B3) \
+ do { \
+ B0 ^= SIMD_4x32::splat(m_round_key[4*round ]); \
+ B1 ^= SIMD_4x32::splat(m_round_key[4*round+1]); \
+ B2 ^= SIMD_4x32::splat(m_round_key[4*round+2]); \
+ B3 ^= SIMD_4x32::splat(m_round_key[4*round+3]); \
+ } while(0)
+
+/*
+* Serpent's linear transformations
+*/
+#define transform(B0, B1, B2, B3) \
+ do { \
+ B0 = B0.rotl<13>(); \
+ B2 = B2.rotl<3>(); \
+ B1 ^= B0 ^ B2; \
+ B3 ^= B2 ^ B0.shl<3>(); \
+ B1 = B1.rotl<1>(); \
+ B3 = B3.rotl<7>(); \
+ B0 ^= B1 ^ B3; \
+ B2 ^= B3 ^ B1.shl<7>(); \
+ B0 = B0.rotl<5>(); \
+ B2 = B2.rotl<22>(); \
+ } while(0)
+
+#define i_transform(B0, B1, B2, B3) \
+ do { \
+ B2 = B2.rotr<22>(); \
+ B0 = B0.rotr<5>(); \
+ B2 ^= B3 ^ B1.shl<7>(); \
+ B0 ^= B1 ^ B3; \
+ B3 = B3.rotr<7>(); \
+ B1 = B1.rotr<1>(); \
+ B3 ^= B2 ^ B0.shl<3>(); \
+ B1 ^= B0 ^ B2; \
+ B2 = B2.rotr<3>(); \
+ B0 = B0.rotr<13>(); \
+ } while(0)
+
+/*
+* SIMD Serpent Encryption of 4 blocks in parallel
+*/
+void Serpent::simd_encrypt_4(const uint8_t in[64], uint8_t out[64]) const
+ {
+ SIMD_4x32 B0 = SIMD_4x32::load_le(in);
+ SIMD_4x32 B1 = SIMD_4x32::load_le(in + 16);
+ SIMD_4x32 B2 = SIMD_4x32::load_le(in + 32);
+ SIMD_4x32 B3 = SIMD_4x32::load_le(in + 48);
+
+ SIMD_4x32::transpose(B0, B1, B2, B3);
+
+ key_xor( 0,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 1,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 2,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 3,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 4,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 5,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 6,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 7,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+
+ key_xor( 8,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor( 9,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(10,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(11,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(12,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(13,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(14,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(15,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+
+ key_xor(16,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(17,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(18,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(19,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(20,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(21,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(22,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(23,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+
+ key_xor(24,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(25,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(26,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(27,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(28,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(29,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(30,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
+ key_xor(31,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3);
+
+ SIMD_4x32::transpose(B0, B1, B2, B3);
+
+ B0.store_le(out);
+ B1.store_le(out + 16);
+ B2.store_le(out + 32);
+ B3.store_le(out + 48);
+ }
+
+/*
+* SIMD Serpent Decryption of 4 blocks in parallel
+*/
+void Serpent::simd_decrypt_4(const uint8_t in[64], uint8_t out[64]) const
+ {
+ SIMD_4x32 B0 = SIMD_4x32::load_le(in);
+ SIMD_4x32 B1 = SIMD_4x32::load_le(in + 16);
+ SIMD_4x32 B2 = SIMD_4x32::load_le(in + 32);
+ SIMD_4x32 B3 = SIMD_4x32::load_le(in + 48);
+
+ SIMD_4x32::transpose(B0, B1, B2, B3);
+
+ key_xor(32,B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(28,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(27,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(24,B0,B1,B2,B3);
+
+ i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(21,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(20,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(19,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(16,B0,B1,B2,B3);
+
+ i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(13,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(12,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(11,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 8,B0,B1,B2,B3);
+
+ i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor( 5,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor( 4,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor( 3,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3);
+ i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3);
+
+ SIMD_4x32::transpose(B0, B1, B2, B3);
+
+ B0.store_le(out);
+ B1.store_le(out + 16);
+ B2.store_le(out + 32);
+ B3.store_le(out + 48);
+ }
+
+#undef key_xor
+#undef transform
+#undef i_transform
+
+}