diff options
Diffstat (limited to 'comm/third_party/botan/src/lib/block/serpent')
8 files changed, 1171 insertions, 0 deletions
diff --git a/comm/third_party/botan/src/lib/block/serpent/info.txt b/comm/third_party/botan/src/lib/block/serpent/info.txt new file mode 100644 index 0000000000..89b860ce4f --- /dev/null +++ b/comm/third_party/botan/src/lib/block/serpent/info.txt @@ -0,0 +1,11 @@ +<defines> +SERPENT -> 20131128 +</defines> + +<header:public> +serpent.h +</header:public> + +<header:internal> +serpent_sbox.h +</header:internal> diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent.cpp b/comm/third_party/botan/src/lib/block/serpent/serpent.cpp new file mode 100644 index 0000000000..ff37a177c7 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/serpent/serpent.cpp @@ -0,0 +1,299 @@ +/* +* Serpent +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/serpent.h> +#include <botan/loadstor.h> +#include <botan/rotate.h> +#include <botan/internal/serpent_sbox.h> + +#if defined(BOTAN_HAS_SERPENT_SIMD) || defined(BOTAN_HAS_SERPENT_AVX2) + #include <botan/cpuid.h> +#endif + +namespace Botan { + +namespace { + +/* +* Serpent's Linear Transform +*/ +inline void transform(uint32_t& B0, uint32_t& B1, uint32_t& B2, uint32_t& B3) + { + B0 = rotl<13>(B0); B2 = rotl<3>(B2); + B1 ^= B0 ^ B2; B3 ^= B2 ^ (B0 << 3); + B1 = rotl<1>(B1); B3 = rotl<7>(B3); + B0 ^= B1 ^ B3; B2 ^= B3 ^ (B1 << 7); + B0 = rotl<5>(B0); B2 = rotl<22>(B2); + } + +/* +* Serpent's Inverse Linear Transform +*/ +inline void i_transform(uint32_t& B0, uint32_t& B1, uint32_t& B2, uint32_t& B3) + { + B2 = rotr<22>(B2); B0 = rotr<5>(B0); + B2 ^= B3 ^ (B1 << 7); B0 ^= B1 ^ B3; + B3 = rotr<7>(B3); B1 = rotr<1>(B1); + B3 ^= B2 ^ (B0 << 3); B1 ^= B0 ^ B2; + B2 = rotr<3>(B2); B0 = rotr<13>(B0); + } + +} + +/* +* XOR a key block with a data block +*/ +#define key_xor(round, B0, B1, B2, B3) \ + B0 ^= m_round_key[4*round ]; \ + B1 ^= m_round_key[4*round+1]; \ + B2 ^= m_round_key[4*round+2]; \ + B3 ^= m_round_key[4*round+3]; + +/* +* Serpent Encryption +*/ +void Serpent::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_round_key.empty() == false); + +#if defined(BOTAN_HAS_SERPENT_AVX2) + if(CPUID::has_avx2()) + { + while(blocks >= 8) + { + avx2_encrypt_8(in, out); + in += 8 * BLOCK_SIZE; + out += 8 * BLOCK_SIZE; + blocks -= 8; + } + } +#endif + +#if defined(BOTAN_HAS_SERPENT_SIMD) + if(CPUID::has_simd_32()) + { + while(blocks >= 4) + { + simd_encrypt_4(in, out); + in += 4 * BLOCK_SIZE; + out += 4 * BLOCK_SIZE; + blocks -= 4; + } + } +#endif + + BOTAN_PARALLEL_SIMD_FOR(size_t i = 0; i < blocks; ++i) + { + uint32_t B0, B1, B2, B3; + load_le(in + 16*i, B0, B1, B2, B3); + + key_xor( 0,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 1,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 2,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 3,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 4,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 5,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 6,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 7,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 8,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 9,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(10,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(11,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(12,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(13,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(14,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(15,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(16,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(17,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(18,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(19,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(20,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(21,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(22,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(23,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(24,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(25,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(26,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(27,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(28,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(29,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(30,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(31,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3); + + store_le(out + 16*i, B0, B1, B2, B3); + } + } + +/* +* Serpent Decryption +*/ +void Serpent::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_round_key.empty() == false); + +#if defined(BOTAN_HAS_SERPENT_AVX2) + if(CPUID::has_avx2()) + { + while(blocks >= 8) + { + avx2_decrypt_8(in, out); + in += 8 * BLOCK_SIZE; + out += 8 * BLOCK_SIZE; + blocks -= 8; + } + } +#endif + +#if defined(BOTAN_HAS_SERPENT_SIMD) + if(CPUID::has_simd_32()) + { + while(blocks >= 4) + { + simd_decrypt_4(in, out); + in += 4 * BLOCK_SIZE; + out += 4 * BLOCK_SIZE; + blocks -= 4; + } + } +#endif + + BOTAN_PARALLEL_SIMD_FOR(size_t i = 0; i < blocks; ++i) + { + uint32_t B0, B1, B2, B3; + load_le(in + 16*i, B0, B1, B2, B3); + + key_xor(32,B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(28,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(27,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(24,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(21,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(20,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(19,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(16,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(13,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(12,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(11,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 8,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor( 5,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor( 4,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor( 3,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3); + + store_le(out + 16*i, B0, B1, B2, B3); + } + } + +#undef key_xor +#undef transform +#undef i_transform + +/* +* Serpent Key Schedule +*/ +void Serpent::key_schedule(const uint8_t key[], size_t length) + { + const uint32_t PHI = 0x9E3779B9; + + secure_vector<uint32_t> W(140); + for(size_t i = 0; i != length / 4; ++i) + W[i] = load_le<uint32_t>(key, i); + + W[length / 4] |= uint32_t(1) << ((length%4)*8); + + for(size_t i = 8; i != 140; ++i) + { + uint32_t wi = W[i-8] ^ W[i-5] ^ W[i-3] ^ W[i-1] ^ PHI ^ uint32_t(i-8); + W[i] = rotl<11>(wi); + } + + SBoxE0(W[ 20],W[ 21],W[ 22],W[ 23]); + SBoxE0(W[ 52],W[ 53],W[ 54],W[ 55]); + SBoxE0(W[ 84],W[ 85],W[ 86],W[ 87]); + SBoxE0(W[116],W[117],W[118],W[119]); + + SBoxE1(W[ 16],W[ 17],W[ 18],W[ 19]); + SBoxE1(W[ 48],W[ 49],W[ 50],W[ 51]); + SBoxE1(W[ 80],W[ 81],W[ 82],W[ 83]); + SBoxE1(W[112],W[113],W[114],W[115]); + + SBoxE2(W[ 12],W[ 13],W[ 14],W[ 15]); + SBoxE2(W[ 44],W[ 45],W[ 46],W[ 47]); + SBoxE2(W[ 76],W[ 77],W[ 78],W[ 79]); + SBoxE2(W[108],W[109],W[110],W[111]); + + SBoxE3(W[ 8],W[ 9],W[ 10],W[ 11]); + SBoxE3(W[ 40],W[ 41],W[ 42],W[ 43]); + SBoxE3(W[ 72],W[ 73],W[ 74],W[ 75]); + SBoxE3(W[104],W[105],W[106],W[107]); + SBoxE3(W[136],W[137],W[138],W[139]); + + SBoxE4(W[ 36],W[ 37],W[ 38],W[ 39]); + SBoxE4(W[ 68],W[ 69],W[ 70],W[ 71]); + SBoxE4(W[100],W[101],W[102],W[103]); + SBoxE4(W[132],W[133],W[134],W[135]); + + SBoxE5(W[ 32],W[ 33],W[ 34],W[ 35]); + SBoxE5(W[ 64],W[ 65],W[ 66],W[ 67]); + SBoxE5(W[ 96],W[ 97],W[ 98],W[ 99]); + SBoxE5(W[128],W[129],W[130],W[131]); + + SBoxE6(W[ 28],W[ 29],W[ 30],W[ 31]); + SBoxE6(W[ 60],W[ 61],W[ 62],W[ 63]); + SBoxE6(W[ 92],W[ 93],W[ 94],W[ 95]); + SBoxE6(W[124],W[125],W[126],W[127]); + + SBoxE7(W[ 24],W[ 25],W[ 26],W[ 27]); + SBoxE7(W[ 56],W[ 57],W[ 58],W[ 59]); + SBoxE7(W[ 88],W[ 89],W[ 90],W[ 91]); + SBoxE7(W[120],W[121],W[122],W[123]); + + m_round_key.assign(W.begin() + 8, W.end()); + } + +void Serpent::clear() + { + zap(m_round_key); + } + +std::string Serpent::provider() const + { +#if defined(BOTAN_HAS_SERPENT_AVX2) + if(CPUID::has_avx2()) + { + return "avx2"; + } +#endif + +#if defined(BOTAN_HAS_SERPENT_SIMD) + if(CPUID::has_simd_32()) + { + return "simd"; + } +#endif + + return "base"; + } + +#undef key_xor + +} diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent.h b/comm/third_party/botan/src/lib/block/serpent/serpent.h new file mode 100644 index 0000000000..64eb8a8b04 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/serpent/serpent.h @@ -0,0 +1,53 @@ +/* +* Serpent +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_SERPENT_H_ +#define BOTAN_SERPENT_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(serpent.h) + +namespace Botan { + +/** +* Serpent is the most conservative of the AES finalists +* https://www.cl.cam.ac.uk/~rja14/serpent.html +*/ +class BOTAN_PUBLIC_API(2,0) Serpent final : public Block_Cipher_Fixed_Params<16, 16, 32, 8> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string provider() const override; + std::string name() const override { return "Serpent"; } + BlockCipher* clone() const override { return new Serpent; } + + size_t parallelism() const override { return 4; } + + private: + +#if defined(BOTAN_HAS_SERPENT_SIMD) + void simd_encrypt_4(const uint8_t in[64], uint8_t out[64]) const; + void simd_decrypt_4(const uint8_t in[64], uint8_t out[64]) const; +#endif + +#if defined(BOTAN_HAS_SERPENT_AVX2) + void avx2_encrypt_8(const uint8_t in[64], uint8_t out[64]) const; + void avx2_decrypt_8(const uint8_t in[64], uint8_t out[64]) const; +#endif + + void key_schedule(const uint8_t key[], size_t length) override; + + secure_vector<uint32_t> m_round_key; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/info.txt b/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/info.txt new file mode 100644 index 0000000000..b0fbfb334e --- /dev/null +++ b/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/info.txt @@ -0,0 +1,17 @@ +<defines> +SERPENT_AVX2 -> 20180824 +</defines> + +<isa> +avx2 +</isa> + +<requires> +simd_avx2 +</requires> + +# We must exclude MSVC due to #2120 +<cc> +gcc +clang +</cc> diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp b/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp new file mode 100644 index 0000000000..0db332035d --- /dev/null +++ b/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp @@ -0,0 +1,169 @@ +/* +* (C) 2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/serpent.h> +#include <botan/internal/serpent_sbox.h> +#include <botan/internal/simd_avx2.h> + +namespace Botan { + + +#define key_xor(round, B0, B1, B2, B3) \ + do { \ + B0 ^= SIMD_8x32::splat(m_round_key[4*round ]); \ + B1 ^= SIMD_8x32::splat(m_round_key[4*round+1]); \ + B2 ^= SIMD_8x32::splat(m_round_key[4*round+2]); \ + B3 ^= SIMD_8x32::splat(m_round_key[4*round+3]); \ + } while(0) + +/* +* Serpent's linear transformations +*/ +#define transform(B0, B1, B2, B3) \ + do { \ + B0 = B0.rotl<13>(); \ + B2 = B2.rotl<3>(); \ + B1 ^= B0 ^ B2; \ + B3 ^= B2 ^ B0.shl<3>(); \ + B1 = B1.rotl<1>(); \ + B3 = B3.rotl<7>(); \ + B0 ^= B1 ^ B3; \ + B2 ^= B3 ^ B1.shl<7>(); \ + B0 = B0.rotl<5>(); \ + B2 = B2.rotl<22>(); \ + } while(0) + +#define i_transform(B0, B1, B2, B3) \ + do { \ + B2 = B2.rotr<22>(); \ + B0 = B0.rotr<5>(); \ + B2 ^= B3 ^ B1.shl<7>(); \ + B0 ^= B1 ^ B3; \ + B3 = B3.rotr<7>(); \ + B1 = B1.rotr<1>(); \ + B3 ^= B2 ^ B0.shl<3>(); \ + B1 ^= B0 ^ B2; \ + B2 = B2.rotr<3>(); \ + B0 = B0.rotr<13>(); \ + } while(0) + +BOTAN_FUNC_ISA("avx2") +void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const + { + SIMD_8x32::reset_registers(); + + SIMD_8x32 B0 = SIMD_8x32::load_le(in); + SIMD_8x32 B1 = SIMD_8x32::load_le(in + 32); + SIMD_8x32 B2 = SIMD_8x32::load_le(in + 64); + SIMD_8x32 B3 = SIMD_8x32::load_le(in + 96); + + SIMD_8x32::transpose(B0, B1, B2, B3); + + key_xor( 0,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 1,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 2,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 3,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 4,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 5,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 6,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 7,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 8,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 9,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(10,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(11,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(12,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(13,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(14,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(15,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(16,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(17,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(18,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(19,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(20,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(21,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(22,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(23,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(24,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(25,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(26,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(27,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(28,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(29,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(30,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(31,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3); + + SIMD_8x32::transpose(B0, B1, B2, B3); + B0.store_le(out); + B1.store_le(out + 32); + B2.store_le(out + 64); + B3.store_le(out + 96); + + SIMD_8x32::zero_registers(); + } + +BOTAN_FUNC_ISA("avx2") +void Serpent::avx2_decrypt_8(const uint8_t in[128], uint8_t out[128]) const + { + SIMD_8x32::reset_registers(); + + SIMD_8x32 B0 = SIMD_8x32::load_le(in); + SIMD_8x32 B1 = SIMD_8x32::load_le(in + 32); + SIMD_8x32 B2 = SIMD_8x32::load_le(in + 64); + SIMD_8x32 B3 = SIMD_8x32::load_le(in + 96); + + SIMD_8x32::transpose(B0, B1, B2, B3); + + key_xor(32,B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(28,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(27,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(24,B0,B1,B2,B3); + + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(21,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(20,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(19,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(16,B0,B1,B2,B3); + + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(13,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(12,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(11,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 8,B0,B1,B2,B3); + + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor( 5,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor( 4,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor( 3,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3); + + SIMD_8x32::transpose(B0, B1, B2, B3); + + B0.store_le(out); + B1.store_le(out + 32); + B2.store_le(out + 64); + B3.store_le(out + 96); + + SIMD_8x32::zero_registers(); + } + +#undef key_xor +#undef transform +#undef i_transform + +} diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent_sbox.h b/comm/third_party/botan/src/lib/block/serpent/serpent_sbox.h new file mode 100644 index 0000000000..31471e7247 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/serpent/serpent_sbox.h @@ -0,0 +1,446 @@ +/* +* Serpent SBox Expressions +* (C) 1999-2007,2013 Jack Lloyd +* +* The sbox expressions used here were discovered by Dag Arne Osvik and +* are described in his paper "Speeding Up Serpent". +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_SERPENT_SBOX_H_ +#define BOTAN_SERPENT_SBOX_H_ + +#include <botan/build.h> + +template<typename T> +BOTAN_FORCE_INLINE void SBoxE0(T& a, T& b, T& c, T& d) + { + d ^= a; + T t0 = b; + b &= d; + t0 ^= c; + b ^= a; + a |= d; + a ^= t0; + t0 ^= d; + d ^= c; + c |= b; + c ^= t0; + t0 = ~t0; + t0 |= b; + b ^= d; + b ^= t0; + d |= a; + b ^= d; + t0 ^= d; + d = a; + a = b; + b = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxE1(T& a, T& b, T& c, T& d) + { + a = ~a; + c = ~c; + T t0 = a; + a &= b; + c ^= a; + a |= d; + d ^= c; + b ^= a; + a ^= t0; + t0 |= b; + b ^= d; + c |= a; + c &= t0; + a ^= b; + b &= c; + b ^= a; + a &= c; + t0 ^= a; + a = c; + c = d; + d = b; + b = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxE2(T& a, T& b, T& c, T& d) + { + T t0 = a; + a &= c; + a ^= d; + c ^= b; + c ^= a; + d |= t0; + d ^= b; + t0 ^= c; + b = d; + d |= t0; + d ^= a; + a &= b; + t0 ^= a; + b ^= d; + b ^= t0; + a = c; + c = b; + b = d; + d = ~t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxE3(T& a, T& b, T& c, T& d) + { + T t0 = a; + a |= d; + d ^= b; + b &= t0; + t0 ^= c; + c ^= d; + d &= a; + t0 |= b; + d ^= t0; + a ^= b; + t0 &= a; + b ^= d; + t0 ^= c; + b |= a; + b ^= c; + a ^= d; + c = b; + b |= d; + a ^= b; + b = c; + c = d; + d = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxE4(T& a, T& b, T& c, T& d) + { + b ^= d; + d = ~d; + c ^= d; + d ^= a; + T t0 = b; + b &= d; + b ^= c; + t0 ^= d; + a ^= t0; + c &= t0; + c ^= a; + a &= b; + d ^= a; + t0 |= b; + t0 ^= a; + a |= d; + a ^= c; + c &= d; + a = ~a; + t0 ^= c; + c = a; + a = b; + b = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxE5(T& a, T& b, T& c, T& d) + { + a ^= b; + b ^= d; + d = ~d; + T t0 = b; + b &= a; + c ^= d; + b ^= c; + c |= t0; + t0 ^= d; + d &= b; + d ^= a; + t0 ^= b; + t0 ^= c; + c ^= a; + a &= d; + c = ~c; + a ^= t0; + t0 |= d; + t0 ^= c; + c = a; + a = b; + b = d; + d = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxE6(T& a, T& b, T& c, T& d) + { + c = ~c; + T t0 = d; + d &= a; + a ^= t0; + d ^= c; + c |= t0; + b ^= d; + c ^= a; + a |= b; + c ^= b; + t0 ^= a; + a |= d; + a ^= c; + t0 ^= d; + t0 ^= a; + d = ~d; + c &= t0; + d ^= c; + c = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxE7(T& a, T& b, T& c, T& d) + { + T t0 = b; + b |= c; + b ^= d; + t0 ^= c; + c ^= b; + d |= t0; + d &= a; + t0 ^= c; + d ^= b; + b |= t0; + b ^= a; + a |= t0; + a ^= c; + b ^= t0; + c ^= b; + b &= a; + b ^= t0; + c = ~c; + c |= a; + t0 ^= c; + c = b; + b = d; + d = a; + a = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxD0(T& a, T& b, T& c, T& d) + { + c = ~c; + T t0 = b; + b |= a; + t0 = ~t0; + b ^= c; + c |= t0; + b ^= d; + a ^= t0; + c ^= a; + a &= d; + t0 ^= a; + a |= b; + a ^= c; + d ^= t0; + c ^= b; + d ^= a; + d ^= b; + c &= d; + t0 ^= c; + c = b; + b = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxD1(T& a, T& b, T& c, T& d) + { + T t0 = b; + b ^= d; + d &= b; + t0 ^= c; + d ^= a; + a |= b; + c ^= d; + a ^= t0; + a |= c; + b ^= d; + a ^= b; + b |= d; + b ^= a; + t0 = ~t0; + t0 ^= b; + b |= a; + b ^= a; + b |= t0; + d ^= b; + b = a; + a = t0; + t0 = c; + c = d; + d = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxD2(T& a, T& b, T& c, T& d) + { + c ^= d; + d ^= a; + T t0 = d; + d &= c; + d ^= b; + b |= c; + b ^= t0; + t0 &= d; + c ^= d; + t0 &= a; + t0 ^= c; + c &= b; + c |= a; + d = ~d; + c ^= d; + a ^= d; + a &= b; + d ^= t0; + d ^= a; + a = b; + b = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxD3(T& a, T& b, T& c, T& d) + { + T t0 = c; + c ^= b; + a ^= c; + t0 &= c; + t0 ^= a; + a &= b; + b ^= d; + d |= t0; + c ^= d; + a ^= d; + b ^= t0; + d &= c; + d ^= b; + b ^= a; + b |= c; + a ^= d; + b ^= t0; + a ^= b; + t0 = a; + a = c; + c = d; + d = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxD4(T& a, T& b, T& c, T& d) + { + T t0 = c; + c &= d; + c ^= b; + b |= d; + b &= a; + t0 ^= c; + t0 ^= b; + b &= c; + a = ~a; + d ^= t0; + b ^= d; + d &= a; + d ^= c; + a ^= b; + c &= a; + d ^= a; + c ^= t0; + c |= d; + d ^= a; + c ^= b; + b = d; + d = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxD5(T& a, T& b, T& c, T& d) + { + b = ~b; + T t0 = d; + c ^= b; + d |= a; + d ^= c; + c |= b; + c &= a; + t0 ^= d; + c ^= t0; + t0 |= a; + t0 ^= b; + b &= c; + b ^= d; + t0 ^= c; + d &= t0; + t0 ^= b; + d ^= t0; + t0 = ~t0; + d ^= a; + a = b; + b = t0; + t0 = d; + d = c; + c = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxD6(T& a, T& b, T& c, T& d) + { + a ^= c; + T t0 = c; + c &= a; + t0 ^= d; + c = ~c; + d ^= b; + c ^= d; + t0 |= a; + a ^= c; + d ^= t0; + t0 ^= b; + b &= d; + b ^= a; + a ^= d; + a |= c; + d ^= b; + t0 ^= a; + a = b; + b = c; + c = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxD7(T& a, T& b, T& c, T& d) + { + T t0 = c; + c ^= a; + a &= d; + t0 |= d; + c = ~c; + d ^= b; + b |= a; + a ^= c; + c &= t0; + d &= t0; + b ^= c; + c ^= a; + a |= c; + t0 ^= b; + a ^= d; + d ^= t0; + t0 |= a; + d ^= c; + t0 ^= c; + c = b; + b = a; + a = d; + d = t0; + } + +#endif diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent_simd/info.txt b/comm/third_party/botan/src/lib/block/serpent/serpent_simd/info.txt new file mode 100644 index 0000000000..f7dadf33fc --- /dev/null +++ b/comm/third_party/botan/src/lib/block/serpent/serpent_simd/info.txt @@ -0,0 +1,7 @@ +<defines> +SERPENT_SIMD -> 20160903 +</defines> + +<requires> +simd +</requires> diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent_simd/serpent_simd.cpp b/comm/third_party/botan/src/lib/block/serpent/serpent_simd/serpent_simd.cpp new file mode 100644 index 0000000000..8ac783ba5c --- /dev/null +++ b/comm/third_party/botan/src/lib/block/serpent/serpent_simd/serpent_simd.cpp @@ -0,0 +1,169 @@ +/* +* Serpent (SIMD) +* (C) 2009,2013 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/serpent.h> +#include <botan/internal/serpent_sbox.h> +#include <botan/internal/simd_32.h> + +namespace Botan { + +#define key_xor(round, B0, B1, B2, B3) \ + do { \ + B0 ^= SIMD_4x32::splat(m_round_key[4*round ]); \ + B1 ^= SIMD_4x32::splat(m_round_key[4*round+1]); \ + B2 ^= SIMD_4x32::splat(m_round_key[4*round+2]); \ + B3 ^= SIMD_4x32::splat(m_round_key[4*round+3]); \ + } while(0) + +/* +* Serpent's linear transformations +*/ +#define transform(B0, B1, B2, B3) \ + do { \ + B0 = B0.rotl<13>(); \ + B2 = B2.rotl<3>(); \ + B1 ^= B0 ^ B2; \ + B3 ^= B2 ^ B0.shl<3>(); \ + B1 = B1.rotl<1>(); \ + B3 = B3.rotl<7>(); \ + B0 ^= B1 ^ B3; \ + B2 ^= B3 ^ B1.shl<7>(); \ + B0 = B0.rotl<5>(); \ + B2 = B2.rotl<22>(); \ + } while(0) + +#define i_transform(B0, B1, B2, B3) \ + do { \ + B2 = B2.rotr<22>(); \ + B0 = B0.rotr<5>(); \ + B2 ^= B3 ^ B1.shl<7>(); \ + B0 ^= B1 ^ B3; \ + B3 = B3.rotr<7>(); \ + B1 = B1.rotr<1>(); \ + B3 ^= B2 ^ B0.shl<3>(); \ + B1 ^= B0 ^ B2; \ + B2 = B2.rotr<3>(); \ + B0 = B0.rotr<13>(); \ + } while(0) + +/* +* SIMD Serpent Encryption of 4 blocks in parallel +*/ +void Serpent::simd_encrypt_4(const uint8_t in[64], uint8_t out[64]) const + { + SIMD_4x32 B0 = SIMD_4x32::load_le(in); + SIMD_4x32 B1 = SIMD_4x32::load_le(in + 16); + SIMD_4x32 B2 = SIMD_4x32::load_le(in + 32); + SIMD_4x32 B3 = SIMD_4x32::load_le(in + 48); + + SIMD_4x32::transpose(B0, B1, B2, B3); + + key_xor( 0,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 1,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 2,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 3,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 4,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 5,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 6,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 7,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + + key_xor( 8,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 9,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(10,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(11,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(12,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(13,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(14,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(15,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + + key_xor(16,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(17,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(18,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(19,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(20,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(21,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(22,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(23,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + + key_xor(24,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(25,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(26,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(27,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(28,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(29,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(30,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(31,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3); + + SIMD_4x32::transpose(B0, B1, B2, B3); + + B0.store_le(out); + B1.store_le(out + 16); + B2.store_le(out + 32); + B3.store_le(out + 48); + } + +/* +* SIMD Serpent Decryption of 4 blocks in parallel +*/ +void Serpent::simd_decrypt_4(const uint8_t in[64], uint8_t out[64]) const + { + SIMD_4x32 B0 = SIMD_4x32::load_le(in); + SIMD_4x32 B1 = SIMD_4x32::load_le(in + 16); + SIMD_4x32 B2 = SIMD_4x32::load_le(in + 32); + SIMD_4x32 B3 = SIMD_4x32::load_le(in + 48); + + SIMD_4x32::transpose(B0, B1, B2, B3); + + key_xor(32,B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(28,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(27,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(24,B0,B1,B2,B3); + + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(21,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(20,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(19,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(16,B0,B1,B2,B3); + + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(13,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(12,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(11,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 8,B0,B1,B2,B3); + + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor( 5,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor( 4,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor( 3,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3); + + SIMD_4x32::transpose(B0, B1, B2, B3); + + B0.store_le(out); + B1.store_le(out + 16); + B2.store_le(out + 32); + B3.store_le(out + 48); + } + +#undef key_xor +#undef transform +#undef i_transform + +} |