diff options
Diffstat (limited to 'comm/third_party/botan/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp')
-rw-r--r-- | comm/third_party/botan/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp | 122 |
1 files changed, 122 insertions, 0 deletions
diff --git a/comm/third_party/botan/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp b/comm/third_party/botan/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp new file mode 100644 index 0000000000..a465a38286 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp @@ -0,0 +1,122 @@ +/* +* (C) 2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/shacal2.h> +#include <botan/internal/simd_avx2.h> + +namespace Botan { + +namespace { + +void BOTAN_FORCE_INLINE BOTAN_FUNC_ISA("avx2") + SHACAL2_Fwd(const SIMD_8x32& A, const SIMD_8x32& B, const SIMD_8x32& C, SIMD_8x32& D, + const SIMD_8x32& E, const SIMD_8x32& F, const SIMD_8x32& G, SIMD_8x32& H, + uint32_t RK) + { + H += E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_8x32::splat(RK); + D += H; + H += A.rho<2,13,22>() + ((A & B) | ((A | B) & C)); + } + +void BOTAN_FORCE_INLINE BOTAN_FUNC_ISA("avx2") + SHACAL2_Rev(const SIMD_8x32& A, const SIMD_8x32& B, const SIMD_8x32& C, SIMD_8x32& D, + const SIMD_8x32& E, const SIMD_8x32& F, const SIMD_8x32& G, SIMD_8x32& H, + uint32_t RK) + { + H -= A.rho<2,13,22>() + ((A & B) | ((A | B) & C)); + D -= H; + H -= E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_8x32::splat(RK); + } + +} + +void BOTAN_FUNC_ISA("avx2") SHACAL2::avx2_encrypt_8(const uint8_t in[], uint8_t out[]) const + { + SIMD_8x32::reset_registers(); + + SIMD_8x32 A = SIMD_8x32::load_be(in); + SIMD_8x32 B = SIMD_8x32::load_be(in+32); + SIMD_8x32 C = SIMD_8x32::load_be(in+64); + SIMD_8x32 D = SIMD_8x32::load_be(in+96); + + SIMD_8x32 E = SIMD_8x32::load_be(in+128); + SIMD_8x32 F = SIMD_8x32::load_be(in+160); + SIMD_8x32 G = SIMD_8x32::load_be(in+192); + SIMD_8x32 H = SIMD_8x32::load_be(in+224); + + SIMD_8x32::transpose(A, B, C, D, E, F, G, H); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]); + SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]); + SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]); + SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]); + SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]); + SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]); + SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]); + SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]); + } + + SIMD_8x32::transpose(A, B, C, D, E, F, G, H); + + A.store_be(out); + B.store_be(out+32); + C.store_be(out+64); + D.store_be(out+96); + + E.store_be(out+128); + F.store_be(out+160); + G.store_be(out+192); + H.store_be(out+224); + + SIMD_8x32::zero_registers(); + } + +BOTAN_FUNC_ISA("avx2") void SHACAL2::avx2_decrypt_8(const uint8_t in[], uint8_t out[]) const + { + SIMD_8x32::reset_registers(); + + SIMD_8x32 A = SIMD_8x32::load_be(in); + SIMD_8x32 B = SIMD_8x32::load_be(in+32); + SIMD_8x32 C = SIMD_8x32::load_be(in+64); + SIMD_8x32 D = SIMD_8x32::load_be(in+96); + + SIMD_8x32 E = SIMD_8x32::load_be(in+128); + SIMD_8x32 F = SIMD_8x32::load_be(in+160); + SIMD_8x32 G = SIMD_8x32::load_be(in+192); + SIMD_8x32 H = SIMD_8x32::load_be(in+224); + + SIMD_8x32::transpose(A, B, C, D, E, F, G, H); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]); + SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]); + SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]); + SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]); + SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]); + SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]); + SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]); + SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]); + } + + SIMD_8x32::transpose(A, B, C, D, E, F, G, H); + + A.store_be(out); + B.store_be(out+32); + C.store_be(out+64); + D.store_be(out+96); + + E.store_be(out+128); + F.store_be(out+160); + G.store_be(out+192); + H.store_be(out+224); + + SIMD_8x32::zero_registers(); + } + +} |