diff options
Diffstat (limited to 'comm/third_party/botan/src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp')
-rw-r--r-- | comm/third_party/botan/src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp | 143 |
1 files changed, 143 insertions, 0 deletions
diff --git a/comm/third_party/botan/src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp b/comm/third_party/botan/src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp new file mode 100644 index 0000000000..83467a054c --- /dev/null +++ b/comm/third_party/botan/src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp @@ -0,0 +1,143 @@ +/* +* Noekeon in SIMD +* (C) 2010 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/noekeon.h> +#include <botan/internal/simd_32.h> + +namespace Botan { + +/* +* Noekeon's Theta Operation +*/ +#define NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3) \ + do { \ + SIMD_4x32 T = A0 ^ A2; \ + T ^= T.rotl<8>() ^ T.rotr<8>(); \ + A1 ^= T; \ + A3 ^= T; \ + \ + A0 ^= K0; \ + A1 ^= K1; \ + A2 ^= K2; \ + A3 ^= K3; \ + \ + T = A1 ^ A3; \ + T ^= T.rotl<8>() ^ T.rotr<8>(); \ + A0 ^= T; \ + A2 ^= T; \ + } while(0) + +/* +* Noekeon's Gamma S-Box Layer +*/ +#define NOK_SIMD_GAMMA(A0, A1, A2, A3) \ + do \ + { \ + A1 ^= A3.andc(~A2); \ + A0 ^= A2 & A1; \ + \ + SIMD_4x32 T = A3; \ + A3 = A0; \ + A0 = T; \ + \ + A2 ^= A0 ^ A1 ^ A3; \ + \ + A1 ^= A3.andc(~A2); \ + A0 ^= A2 & A1; \ + } while(0) + +/* +* Noekeon Encryption +*/ +void Noekeon::simd_encrypt_4(const uint8_t in[], uint8_t out[]) const + { + const SIMD_4x32 K0 = SIMD_4x32::splat(m_EK[0]); + const SIMD_4x32 K1 = SIMD_4x32::splat(m_EK[1]); + const SIMD_4x32 K2 = SIMD_4x32::splat(m_EK[2]); + const SIMD_4x32 K3 = SIMD_4x32::splat(m_EK[3]); + + SIMD_4x32 A0 = SIMD_4x32::load_be(in ); + SIMD_4x32 A1 = SIMD_4x32::load_be(in + 16); + SIMD_4x32 A2 = SIMD_4x32::load_be(in + 32); + SIMD_4x32 A3 = SIMD_4x32::load_be(in + 48); + + SIMD_4x32::transpose(A0, A1, A2, A3); + + for(size_t i = 0; i != 16; ++i) + { + A0 ^= SIMD_4x32::splat(RC[i]); + + NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3); + + A1 = A1.rotl<1>(); + A2 = A2.rotl<5>(); + A3 = A3.rotl<2>(); + + NOK_SIMD_GAMMA(A0, A1, A2, A3); + + A1 = A1.rotr<1>(); + A2 = A2.rotr<5>(); + A3 = A3.rotr<2>(); + } + + A0 ^= SIMD_4x32::splat(RC[16]); + NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3); + + SIMD_4x32::transpose(A0, A1, A2, A3); + + A0.store_be(out); + A1.store_be(out + 16); + A2.store_be(out + 32); + A3.store_be(out + 48); + } + +/* +* Noekeon Encryption +*/ +void Noekeon::simd_decrypt_4(const uint8_t in[], uint8_t out[]) const + { + const SIMD_4x32 K0 = SIMD_4x32::splat(m_DK[0]); + const SIMD_4x32 K1 = SIMD_4x32::splat(m_DK[1]); + const SIMD_4x32 K2 = SIMD_4x32::splat(m_DK[2]); + const SIMD_4x32 K3 = SIMD_4x32::splat(m_DK[3]); + + SIMD_4x32 A0 = SIMD_4x32::load_be(in ); + SIMD_4x32 A1 = SIMD_4x32::load_be(in + 16); + SIMD_4x32 A2 = SIMD_4x32::load_be(in + 32); + SIMD_4x32 A3 = SIMD_4x32::load_be(in + 48); + + SIMD_4x32::transpose(A0, A1, A2, A3); + + for(size_t i = 0; i != 16; ++i) + { + NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3); + + A0 ^= SIMD_4x32::splat(RC[16-i]); + + A1 = A1.rotl<1>(); + A2 = A2.rotl<5>(); + A3 = A3.rotl<2>(); + + NOK_SIMD_GAMMA(A0, A1, A2, A3); + + A1 = A1.rotr<1>(); + A2 = A2.rotr<5>(); + A3 = A3.rotr<2>(); + } + + NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3); + A0 ^= SIMD_4x32::splat(RC[0]); + + SIMD_4x32::transpose(A0, A1, A2, A3); + + A0.store_be(out); + A1.store_be(out + 16); + A2.store_be(out + 32); + A3.store_be(out + 48); + } + +} |