diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /comm/third_party/botan/src/lib/block/shacal2 | |
parent | Initial commit. (diff) | |
download | thunderbird-upstream.tar.xz thunderbird-upstream.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'comm/third_party/botan/src/lib/block/shacal2')
9 files changed, 737 insertions, 0 deletions
diff --git a/comm/third_party/botan/src/lib/block/shacal2/info.txt b/comm/third_party/botan/src/lib/block/shacal2/info.txt new file mode 100644 index 0000000000..62e00503f9 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/info.txt @@ -0,0 +1,5 @@ +<defines> +SHACAL2 -> 20170813 +</defines> + + diff --git a/comm/third_party/botan/src/lib/block/shacal2/shacal2.cpp b/comm/third_party/botan/src/lib/block/shacal2/shacal2.cpp new file mode 100644 index 0000000000..b0c57f2359 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/shacal2.cpp @@ -0,0 +1,280 @@ +/* +* SHACAL-2 +* (C) 2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/shacal2.h> +#include <botan/loadstor.h> +#include <botan/rotate.h> +#include <botan/cpuid.h> + +namespace Botan { + +namespace { + +inline void SHACAL2_Fwd(uint32_t A, uint32_t B, uint32_t C, uint32_t& D, + uint32_t E, uint32_t F, uint32_t G, uint32_t& H, + uint32_t RK) + { + const uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A); + const uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E); + + H += E_rho + ((E & F) ^ (~E & G)) + RK; + D += H; + H += A_rho + ((A & B) | ((A | B) & C)); + } + +inline void SHACAL2_Rev(uint32_t A, uint32_t B, uint32_t C, uint32_t& D, + uint32_t E, uint32_t F, uint32_t G, uint32_t& H, + uint32_t RK) + { + const uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A); + const uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E); + + H -= A_rho + ((A & B) | ((A | B) & C)); + D -= H; + H -= E_rho + ((E & F) ^ (~E & G)) + RK; + } + +} + +/* +* SHACAL2 Encryption +*/ +void SHACAL2::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_RK.empty() == false); + +#if defined(BOTAN_HAS_SHACAL2_X86) + if(CPUID::has_intel_sha()) + { + return x86_encrypt_blocks(in, out, blocks); + } +#endif + +#if defined(BOTAN_HAS_SHACAL2_AVX2) + if(CPUID::has_avx2()) + { + while(blocks >= 8) + { + avx2_encrypt_8(in, out); + in += 8*BLOCK_SIZE; + out += 8*BLOCK_SIZE; + blocks -= 8; + } + } +#endif + +#if defined(BOTAN_HAS_SHACAL2_SIMD) + if(CPUID::has_simd_32()) + { + while(blocks >= 4) + { + simd_encrypt_4(in, out); + in += 4*BLOCK_SIZE; + out += 4*BLOCK_SIZE; + blocks -= 4; + } + } +#endif + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t A = load_be<uint32_t>(in, 0); + uint32_t B = load_be<uint32_t>(in, 1); + uint32_t C = load_be<uint32_t>(in, 2); + uint32_t D = load_be<uint32_t>(in, 3); + uint32_t E = load_be<uint32_t>(in, 4); + uint32_t F = load_be<uint32_t>(in, 5); + uint32_t G = load_be<uint32_t>(in, 6); + uint32_t H = load_be<uint32_t>(in, 7); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]); + SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]); + SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]); + SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]); + SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]); + SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]); + SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]); + SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]); + } + + store_be(out, A, B, C, D, E, F, G, H); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* SHACAL2 Encryption +*/ +void SHACAL2::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_RK.empty() == false); + +#if defined(BOTAN_HAS_SHACAL2_AVX2) + if(CPUID::has_avx2()) + { + while(blocks >= 8) + { + avx2_decrypt_8(in, out); + in += 8*BLOCK_SIZE; + out += 8*BLOCK_SIZE; + blocks -= 8; + } + } +#endif + +#if defined(BOTAN_HAS_SHACAL2_SIMD) + if(CPUID::has_simd_32()) + { + while(blocks >= 4) + { + simd_decrypt_4(in, out); + in += 4*BLOCK_SIZE; + out += 4*BLOCK_SIZE; + blocks -= 4; + } + } +#endif + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t A = load_be<uint32_t>(in, 0); + uint32_t B = load_be<uint32_t>(in, 1); + uint32_t C = load_be<uint32_t>(in, 2); + uint32_t D = load_be<uint32_t>(in, 3); + uint32_t E = load_be<uint32_t>(in, 4); + uint32_t F = load_be<uint32_t>(in, 5); + uint32_t G = load_be<uint32_t>(in, 6); + uint32_t H = load_be<uint32_t>(in, 7); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]); + SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]); + SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]); + SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]); + SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]); + SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]); + SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]); + SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]); + } + + store_be(out, A, B, C, D, E, F, G, H); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* SHACAL2 Key Schedule +*/ +void SHACAL2::key_schedule(const uint8_t key[], size_t len) + { + const uint32_t RC[64] = { + 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, + 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, + 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, + 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, + 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, + 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, + 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, + 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, + 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, + 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, + 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, + 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, + 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, + 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, + 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, + 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2 + }; + + if(m_RK.empty()) + m_RK.resize(64); + else + clear_mem(m_RK.data(), m_RK.size()); + + load_be(m_RK.data(), key, len/4); + + for(size_t i = 16; i != 64; ++i) + { + const uint32_t sigma0_15 = rotr< 7>(m_RK[i-15]) ^ rotr<18>(m_RK[i-15]) ^ (m_RK[i-15] >> 3); + const uint32_t sigma1_2 = rotr<17>(m_RK[i- 2]) ^ rotr<19>(m_RK[i- 2]) ^ (m_RK[i- 2] >> 10); + m_RK[i] = m_RK[i-16] + sigma0_15 + m_RK[i-7] + sigma1_2; + } + + for(size_t i = 0; i != 64; ++i) + { + m_RK[i] += RC[i]; + } + } + +size_t SHACAL2::parallelism() const + { +#if defined(BOTAN_HAS_SHACAL2_X86) + if(CPUID::has_intel_sha()) + { + return 4; + } +#endif + +#if defined(BOTAN_HAS_SHACAL2_AVX2) + if(CPUID::has_avx2()) + { + return 8; + } +#endif + +#if defined(BOTAN_HAS_SHACAL2_SIMD) + if(CPUID::has_simd_32()) + { + return 4; + } +#endif + + return 1; + } + +std::string SHACAL2::provider() const + { +#if defined(BOTAN_HAS_SHACAL2_X86) + if(CPUID::has_intel_sha()) + { + return "intel_sha"; + } +#endif + +#if defined(BOTAN_HAS_SHACAL2_AVX2) + if(CPUID::has_avx2()) + { + return "avx2"; + } +#endif + +#if defined(BOTAN_HAS_SHACAL2_SIMD) + if(CPUID::has_simd_32()) + { + return "simd"; + } +#endif + + return "base"; + } + +/* +* Clear memory of sensitive data +*/ +void SHACAL2::clear() + { + zap(m_RK); + } + +} diff --git a/comm/third_party/botan/src/lib/block/shacal2/shacal2.h b/comm/third_party/botan/src/lib/block/shacal2/shacal2.h new file mode 100644 index 0000000000..b752a03390 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/shacal2.h @@ -0,0 +1,54 @@ +/* +* SHACAL-2 +* (C) 2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_SHACAL2_H_ +#define BOTAN_SHACAL2_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(shacal2.h) + +namespace Botan { + +/** +* SHACAL2 +*/ +class BOTAN_PUBLIC_API(2,3) SHACAL2 final : public Block_Cipher_Fixed_Params<32, 16, 64, 4> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + std::string provider() const override; + void clear() override; + std::string name() const override { return "SHACAL2"; } + BlockCipher* clone() const override { return new SHACAL2; } + size_t parallelism() const override; + + private: + void key_schedule(const uint8_t[], size_t) override; + +#if defined(BOTAN_HAS_SHACAL2_SIMD) + void simd_encrypt_4(const uint8_t in[], uint8_t out[]) const; + void simd_decrypt_4(const uint8_t in[], uint8_t out[]) const; +#endif + +#if defined(BOTAN_HAS_SHACAL2_AVX2) + void avx2_encrypt_8(const uint8_t in[], uint8_t out[]) const; + void avx2_decrypt_8(const uint8_t in[], uint8_t out[]) const; +#endif + +#if defined(BOTAN_HAS_SHACAL2_X86) + void x86_encrypt_blocks(const uint8_t in[], uint8_t out[], size_t blocks) const; +#endif + + secure_vector<uint32_t> m_RK; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/shacal2/shacal2_avx2/info.txt b/comm/third_party/botan/src/lib/block/shacal2/shacal2_avx2/info.txt new file mode 100644 index 0000000000..a0b5ce1a97 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/shacal2_avx2/info.txt @@ -0,0 +1,11 @@ +<defines> +SHACAL2_AVX2 -> 20180826 +</defines> + +<isa> +avx2 +</isa> + +<requires> +simd_avx2 +</requires> diff --git a/comm/third_party/botan/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp b/comm/third_party/botan/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp new file mode 100644 index 0000000000..a465a38286 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp @@ -0,0 +1,122 @@ +/* +* (C) 2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/shacal2.h> +#include <botan/internal/simd_avx2.h> + +namespace Botan { + +namespace { + +void BOTAN_FORCE_INLINE BOTAN_FUNC_ISA("avx2") + SHACAL2_Fwd(const SIMD_8x32& A, const SIMD_8x32& B, const SIMD_8x32& C, SIMD_8x32& D, + const SIMD_8x32& E, const SIMD_8x32& F, const SIMD_8x32& G, SIMD_8x32& H, + uint32_t RK) + { + H += E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_8x32::splat(RK); + D += H; + H += A.rho<2,13,22>() + ((A & B) | ((A | B) & C)); + } + +void BOTAN_FORCE_INLINE BOTAN_FUNC_ISA("avx2") + SHACAL2_Rev(const SIMD_8x32& A, const SIMD_8x32& B, const SIMD_8x32& C, SIMD_8x32& D, + const SIMD_8x32& E, const SIMD_8x32& F, const SIMD_8x32& G, SIMD_8x32& H, + uint32_t RK) + { + H -= A.rho<2,13,22>() + ((A & B) | ((A | B) & C)); + D -= H; + H -= E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_8x32::splat(RK); + } + +} + +void BOTAN_FUNC_ISA("avx2") SHACAL2::avx2_encrypt_8(const uint8_t in[], uint8_t out[]) const + { + SIMD_8x32::reset_registers(); + + SIMD_8x32 A = SIMD_8x32::load_be(in); + SIMD_8x32 B = SIMD_8x32::load_be(in+32); + SIMD_8x32 C = SIMD_8x32::load_be(in+64); + SIMD_8x32 D = SIMD_8x32::load_be(in+96); + + SIMD_8x32 E = SIMD_8x32::load_be(in+128); + SIMD_8x32 F = SIMD_8x32::load_be(in+160); + SIMD_8x32 G = SIMD_8x32::load_be(in+192); + SIMD_8x32 H = SIMD_8x32::load_be(in+224); + + SIMD_8x32::transpose(A, B, C, D, E, F, G, H); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]); + SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]); + SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]); + SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]); + SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]); + SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]); + SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]); + SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]); + } + + SIMD_8x32::transpose(A, B, C, D, E, F, G, H); + + A.store_be(out); + B.store_be(out+32); + C.store_be(out+64); + D.store_be(out+96); + + E.store_be(out+128); + F.store_be(out+160); + G.store_be(out+192); + H.store_be(out+224); + + SIMD_8x32::zero_registers(); + } + +BOTAN_FUNC_ISA("avx2") void SHACAL2::avx2_decrypt_8(const uint8_t in[], uint8_t out[]) const + { + SIMD_8x32::reset_registers(); + + SIMD_8x32 A = SIMD_8x32::load_be(in); + SIMD_8x32 B = SIMD_8x32::load_be(in+32); + SIMD_8x32 C = SIMD_8x32::load_be(in+64); + SIMD_8x32 D = SIMD_8x32::load_be(in+96); + + SIMD_8x32 E = SIMD_8x32::load_be(in+128); + SIMD_8x32 F = SIMD_8x32::load_be(in+160); + SIMD_8x32 G = SIMD_8x32::load_be(in+192); + SIMD_8x32 H = SIMD_8x32::load_be(in+224); + + SIMD_8x32::transpose(A, B, C, D, E, F, G, H); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]); + SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]); + SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]); + SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]); + SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]); + SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]); + SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]); + SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]); + } + + SIMD_8x32::transpose(A, B, C, D, E, F, G, H); + + A.store_be(out); + B.store_be(out+32); + C.store_be(out+64); + D.store_be(out+96); + + E.store_be(out+128); + F.store_be(out+160); + G.store_be(out+192); + H.store_be(out+224); + + SIMD_8x32::zero_registers(); + } + +} diff --git a/comm/third_party/botan/src/lib/block/shacal2/shacal2_simd/info.txt b/comm/third_party/botan/src/lib/block/shacal2/shacal2_simd/info.txt new file mode 100644 index 0000000000..8d715c668c --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/shacal2_simd/info.txt @@ -0,0 +1,8 @@ +<defines> +SHACAL2_SIMD -> 20170813 +</defines> + +<requires> +shacal2 +simd +</requires> diff --git a/comm/third_party/botan/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp b/comm/third_party/botan/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp new file mode 100644 index 0000000000..6d15faf1a6 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp @@ -0,0 +1,119 @@ +/* +* SHACAL-2 using SIMD +* (C) 2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/shacal2.h> +#include <botan/internal/simd_32.h> + +namespace Botan { + +namespace { + +inline +void SHACAL2_Fwd(const SIMD_4x32& A, const SIMD_4x32& B, const SIMD_4x32& C, SIMD_4x32& D, + const SIMD_4x32& E, const SIMD_4x32& F, const SIMD_4x32& G, SIMD_4x32& H, + uint32_t RK) + { + H += E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_4x32::splat(RK); + D += H; + H += A.rho<2,13,22>() + ((A & B) | ((A | B) & C)); + } + +inline +void SHACAL2_Rev(const SIMD_4x32& A, const SIMD_4x32& B, const SIMD_4x32& C, SIMD_4x32& D, + const SIMD_4x32& E, const SIMD_4x32& F, const SIMD_4x32& G, SIMD_4x32& H, + uint32_t RK) + { + H -= A.rho<2,13,22>() + ((A & B) | ((A | B) & C)); + D -= H; + H -= E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_4x32::splat(RK); + } + +} + +void SHACAL2::simd_encrypt_4(const uint8_t in[], uint8_t out[]) const + { + SIMD_4x32 A = SIMD_4x32::load_be(in); + SIMD_4x32 E = SIMD_4x32::load_be(in+16); + SIMD_4x32 B = SIMD_4x32::load_be(in+32); + SIMD_4x32 F = SIMD_4x32::load_be(in+48); + + SIMD_4x32 C = SIMD_4x32::load_be(in+64); + SIMD_4x32 G = SIMD_4x32::load_be(in+80); + SIMD_4x32 D = SIMD_4x32::load_be(in+96); + SIMD_4x32 H = SIMD_4x32::load_be(in+112); + + SIMD_4x32::transpose(A, B, C, D); + SIMD_4x32::transpose(E, F, G, H); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]); + SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]); + SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]); + SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]); + SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]); + SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]); + SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]); + SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]); + } + + SIMD_4x32::transpose(A, B, C, D); + SIMD_4x32::transpose(E, F, G, H); + + A.store_be(out); + E.store_be(out+16); + B.store_be(out+32); + F.store_be(out+48); + + C.store_be(out+64); + G.store_be(out+80); + D.store_be(out+96); + H.store_be(out+112); + } + +void SHACAL2::simd_decrypt_4(const uint8_t in[], uint8_t out[]) const + { + SIMD_4x32 A = SIMD_4x32::load_be(in); + SIMD_4x32 E = SIMD_4x32::load_be(in+16); + SIMD_4x32 B = SIMD_4x32::load_be(in+32); + SIMD_4x32 F = SIMD_4x32::load_be(in+48); + + SIMD_4x32 C = SIMD_4x32::load_be(in+64); + SIMD_4x32 G = SIMD_4x32::load_be(in+80); + SIMD_4x32 D = SIMD_4x32::load_be(in+96); + SIMD_4x32 H = SIMD_4x32::load_be(in+112); + + SIMD_4x32::transpose(A, B, C, D); + SIMD_4x32::transpose(E, F, G, H); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]); + SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]); + SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]); + SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]); + SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]); + SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]); + SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]); + SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]); + } + + SIMD_4x32::transpose(A, B, C, D); + SIMD_4x32::transpose(E, F, G, H); + + A.store_be(out); + E.store_be(out+16); + B.store_be(out+32); + F.store_be(out+48); + + C.store_be(out+64); + G.store_be(out+80); + D.store_be(out+96); + H.store_be(out+112); + } + +} diff --git a/comm/third_party/botan/src/lib/block/shacal2/shacal2_x86/info.txt b/comm/third_party/botan/src/lib/block/shacal2/shacal2_x86/info.txt new file mode 100644 index 0000000000..2988330482 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/shacal2_x86/info.txt @@ -0,0 +1,20 @@ +<defines> +SHACAL2_X86 -> 20170814 +</defines> + +<requires> +shacal2 +</requires> + +<isa> +sha +sse2 +ssse3 +</isa> + +<cc> +gcc:5.0 +clang:3.9 +msvc:19.0 # MSVS 2015 +</cc> + diff --git a/comm/third_party/botan/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp b/comm/third_party/botan/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp new file mode 100644 index 0000000000..1611d6c9b6 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp @@ -0,0 +1,118 @@ +/* +* SHACAL-2 using x86 SHA extensions +* (C) 2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/shacal2.h> +#include <immintrin.h> + +namespace Botan { + +/* +Only encryption is supported since the inverse round function would +require a different instruction +*/ + +BOTAN_FUNC_ISA("sha,ssse3") +void SHACAL2::x86_encrypt_blocks(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const __m128i MASK1 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7); + const __m128i MASK2 = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15); + + const __m128i* RK_mm = reinterpret_cast<const __m128i*>(m_RK.data()); + const __m128i* in_mm = reinterpret_cast<const __m128i*>(in); + __m128i* out_mm = reinterpret_cast<__m128i*>(out); + + while(blocks >= 2) + { + __m128i B0_0 = _mm_loadu_si128(in_mm); + __m128i B0_1 = _mm_loadu_si128(in_mm+1); + __m128i B1_0 = _mm_loadu_si128(in_mm+2); + __m128i B1_1 = _mm_loadu_si128(in_mm+3); + + __m128i TMP = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0_0, B0_1), MASK2); + B0_1 = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0_0, B0_1), MASK2); + B0_0 = TMP; + + TMP = _mm_shuffle_epi8(_mm_unpacklo_epi64(B1_0, B1_1), MASK2); + B1_1 = _mm_shuffle_epi8(_mm_unpackhi_epi64(B1_0, B1_1), MASK2); + B1_0 = TMP; + + for(size_t i = 0; i != 8; ++i) + { + const __m128i RK0 = _mm_loadu_si128(RK_mm + 2*i); + const __m128i RK2 = _mm_loadu_si128(RK_mm + 2*i+1); + const __m128i RK1 = _mm_srli_si128(RK0, 8); + const __m128i RK3 = _mm_srli_si128(RK2, 8); + + B0_1 = _mm_sha256rnds2_epu32(B0_1, B0_0, RK0); + B1_1 = _mm_sha256rnds2_epu32(B1_1, B1_0, RK0); + + B0_0 = _mm_sha256rnds2_epu32(B0_0, B0_1, RK1); + B1_0 = _mm_sha256rnds2_epu32(B1_0, B1_1, RK1); + + B0_1 = _mm_sha256rnds2_epu32(B0_1, B0_0, RK2); + B1_1 = _mm_sha256rnds2_epu32(B1_1, B1_0, RK2); + + B0_0 = _mm_sha256rnds2_epu32(B0_0, B0_1, RK3); + B1_0 = _mm_sha256rnds2_epu32(B1_0, B1_1, RK3); + } + + TMP = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0_0, B0_1), MASK1); + B0_1 = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0_0, B0_1), MASK1); + B0_0 = TMP; + + TMP = _mm_shuffle_epi8(_mm_unpackhi_epi64(B1_0, B1_1), MASK1); + B1_1 = _mm_shuffle_epi8(_mm_unpacklo_epi64(B1_0, B1_1), MASK1); + B1_0 = TMP; + + // Save state + _mm_storeu_si128(out_mm + 0, B0_0); + _mm_storeu_si128(out_mm + 1, B0_1); + _mm_storeu_si128(out_mm + 2, B1_0); + _mm_storeu_si128(out_mm + 3, B1_1); + + blocks -= 2; + in_mm += 4; + out_mm += 4; + } + + while(blocks) + { + __m128i B0 = _mm_loadu_si128(in_mm); + __m128i B1 = _mm_loadu_si128(in_mm+1); + + __m128i TMP = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK2); + B1 = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK2); + B0 = TMP; + + for(size_t i = 0; i != 8; ++i) + { + const __m128i RK0 = _mm_loadu_si128(RK_mm + 2*i); + const __m128i RK2 = _mm_loadu_si128(RK_mm + 2*i+1); + const __m128i RK1 = _mm_srli_si128(RK0, 8); + const __m128i RK3 = _mm_srli_si128(RK2, 8); + + B1 = _mm_sha256rnds2_epu32(B1, B0, RK0); + B0 = _mm_sha256rnds2_epu32(B0, B1, RK1); + B1 = _mm_sha256rnds2_epu32(B1, B0, RK2); + B0 = _mm_sha256rnds2_epu32(B0, B1, RK3); + } + + TMP = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK1); + B1 = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK1); + B0 = TMP; + + // Save state + _mm_storeu_si128(out_mm, B0); + _mm_storeu_si128(out_mm + 1, B1); + + blocks--; + in_mm += 2; + out_mm += 2; + } + } + +} |