diff options
Diffstat (limited to 'comm/third_party/botan/src/lib/block')
99 files changed, 15049 insertions, 0 deletions
diff --git a/comm/third_party/botan/src/lib/block/aes/aes.cpp b/comm/third_party/botan/src/lib/block/aes/aes.cpp new file mode 100644 index 0000000000..88d6e9027f --- /dev/null +++ b/comm/third_party/botan/src/lib/block/aes/aes.cpp @@ -0,0 +1,1017 @@ +/* +* (C) 1999-2010,2015,2017,2018,2020 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/aes.h> +#include <botan/loadstor.h> +#include <botan/cpuid.h> +#include <botan/rotate.h> +#include <botan/internal/bit_ops.h> +#include <botan/internal/ct_utils.h> + +namespace Botan { + +#if defined(BOTAN_HAS_AES_POWER8) || defined(BOTAN_HAS_AES_ARMV8) || defined(BOTAN_HAS_AES_NI) + #define BOTAN_HAS_HW_AES_SUPPORT +#endif + +/* +* One of three AES implementation strategies are used to get a constant time +* implementation which is immune to common cache/timing based side channels: +* +* - If AES hardware support is available (AES-NI, POWER8, Aarch64) use that +* +* - If 128-bit SIMD with byte shuffles are available (SSSE3, NEON, or Altivec), +* use the vperm technique published by Mike Hamburg at CHES 2009. +* +* - If no hardware or SIMD support, fall back to a constant time bitsliced +* implementation. This uses 32-bit words resulting in 2 blocks being processed +* in parallel. Moving to 4 blocks (with 64-bit words) would approximately +* double performance on 64-bit CPUs. Likewise moving to 128 bit SIMD would +* again approximately double performance vs 64-bit. However the assumption is +* that most 64-bit CPUs either have hardware AES or SIMD shuffle support and +* that the majority of users falling back to this code will be 32-bit cores. +* If this assumption proves to be unsound, the bitsliced code can easily be +* extended to operate on either 32 or 64 bit words depending on the native +* wordsize of the target processor. +* +* Useful references +* +* - "Accelerating AES with Vector Permute Instructions" Mike Hamburg +* https://www.shiftleft.org/papers/vector_aes/vector_aes.pdf +* +* - "Faster and Timing-Attack Resistant AES-GCM" Käsper and Schwabe +* https://eprint.iacr.org/2009/129.pdf +* +* - "A new combinational logic minimization technique with applications to cryptology." +* Boyar and Peralta https://eprint.iacr.org/2009/191.pdf +* +* - "A depth-16 circuit for the AES S-box" Boyar and Peralta +* https://eprint.iacr.org/2011/332.pdf +* +* - "A Very Compact S-box for AES" Canright +* https://www.iacr.org/archive/ches2005/032.pdf +* https://core.ac.uk/download/pdf/36694529.pdf (extended) +*/ + +namespace { + +/* +This is an AES sbox circuit which can execute in bitsliced mode up to 32x in +parallel. + +The circuit is from the "Circuit Minimization Team" group +http://www.cs.yale.edu/homes/peralta/CircuitStuff/CMT.html +http://www.cs.yale.edu/homes/peralta/CircuitStuff/SLP_AES_113.txt + +This circuit has size 113 and depth 27. In software it is much faster than +circuits which are considered faster for hardware purposes (where circuit depth +is the critical constraint), because unlike in hardware, on common CPUs we can +only execute - at best - 3 or 4 logic operations per cycle. So a smaller circuit +is superior. On an x86-64 machine this circuit is about 15% faster than the +circuit of size 128 and depth 16 given in "A depth-16 circuit for the AES S-box". + +Another circuit for AES Sbox of size 102 and depth 24 is describted in "New +Circuit Minimization Techniques for Smaller and Faster AES SBoxes" +[https://eprint.iacr.org/2019/802] however it relies on "non-standard" gates +like MUX, NOR, NAND, etc and so in practice in bitsliced software, its size is +actually a bit larger than this circuit, as few CPUs have such instructions and +otherwise they must be emulated using a sequence of available bit operations. +*/ +void AES_SBOX(uint32_t V[8]) + { + const uint32_t U0 = V[0]; + const uint32_t U1 = V[1]; + const uint32_t U2 = V[2]; + const uint32_t U3 = V[3]; + const uint32_t U4 = V[4]; + const uint32_t U5 = V[5]; + const uint32_t U6 = V[6]; + const uint32_t U7 = V[7]; + + const uint32_t y14 = U3 ^ U5; + const uint32_t y13 = U0 ^ U6; + const uint32_t y9 = U0 ^ U3; + const uint32_t y8 = U0 ^ U5; + const uint32_t t0 = U1 ^ U2; + const uint32_t y1 = t0 ^ U7; + const uint32_t y4 = y1 ^ U3; + const uint32_t y12 = y13 ^ y14; + const uint32_t y2 = y1 ^ U0; + const uint32_t y5 = y1 ^ U6; + const uint32_t y3 = y5 ^ y8; + const uint32_t t1 = U4 ^ y12; + const uint32_t y15 = t1 ^ U5; + const uint32_t y20 = t1 ^ U1; + const uint32_t y6 = y15 ^ U7; + const uint32_t y10 = y15 ^ t0; + const uint32_t y11 = y20 ^ y9; + const uint32_t y7 = U7 ^ y11; + const uint32_t y17 = y10 ^ y11; + const uint32_t y19 = y10 ^ y8; + const uint32_t y16 = t0 ^ y11; + const uint32_t y21 = y13 ^ y16; + const uint32_t y18 = U0 ^ y16; + const uint32_t t2 = y12 & y15; + const uint32_t t3 = y3 & y6; + const uint32_t t4 = t3 ^ t2; + const uint32_t t5 = y4 & U7; + const uint32_t t6 = t5 ^ t2; + const uint32_t t7 = y13 & y16; + const uint32_t t8 = y5 & y1; + const uint32_t t9 = t8 ^ t7; + const uint32_t t10 = y2 & y7; + const uint32_t t11 = t10 ^ t7; + const uint32_t t12 = y9 & y11; + const uint32_t t13 = y14 & y17; + const uint32_t t14 = t13 ^ t12; + const uint32_t t15 = y8 & y10; + const uint32_t t16 = t15 ^ t12; + const uint32_t t17 = t4 ^ y20; + const uint32_t t18 = t6 ^ t16; + const uint32_t t19 = t9 ^ t14; + const uint32_t t20 = t11 ^ t16; + const uint32_t t21 = t17 ^ t14; + const uint32_t t22 = t18 ^ y19; + const uint32_t t23 = t19 ^ y21; + const uint32_t t24 = t20 ^ y18; + const uint32_t t25 = t21 ^ t22; + const uint32_t t26 = t21 & t23; + const uint32_t t27 = t24 ^ t26; + const uint32_t t28 = t25 & t27; + const uint32_t t29 = t28 ^ t22; + const uint32_t t30 = t23 ^ t24; + const uint32_t t31 = t22 ^ t26; + const uint32_t t32 = t31 & t30; + const uint32_t t33 = t32 ^ t24; + const uint32_t t34 = t23 ^ t33; + const uint32_t t35 = t27 ^ t33; + const uint32_t t36 = t24 & t35; + const uint32_t t37 = t36 ^ t34; + const uint32_t t38 = t27 ^ t36; + const uint32_t t39 = t29 & t38; + const uint32_t t40 = t25 ^ t39; + const uint32_t t41 = t40 ^ t37; + const uint32_t t42 = t29 ^ t33; + const uint32_t t43 = t29 ^ t40; + const uint32_t t44 = t33 ^ t37; + const uint32_t t45 = t42 ^ t41; + const uint32_t z0 = t44 & y15; + const uint32_t z1 = t37 & y6; + const uint32_t z2 = t33 & U7; + const uint32_t z3 = t43 & y16; + const uint32_t z4 = t40 & y1; + const uint32_t z5 = t29 & y7; + const uint32_t z6 = t42 & y11; + const uint32_t z7 = t45 & y17; + const uint32_t z8 = t41 & y10; + const uint32_t z9 = t44 & y12; + const uint32_t z10 = t37 & y3; + const uint32_t z11 = t33 & y4; + const uint32_t z12 = t43 & y13; + const uint32_t z13 = t40 & y5; + const uint32_t z14 = t29 & y2; + const uint32_t z15 = t42 & y9; + const uint32_t z16 = t45 & y14; + const uint32_t z17 = t41 & y8; + const uint32_t tc1 = z15 ^ z16; + const uint32_t tc2 = z10 ^ tc1; + const uint32_t tc3 = z9 ^ tc2; + const uint32_t tc4 = z0 ^ z2; + const uint32_t tc5 = z1 ^ z0; + const uint32_t tc6 = z3 ^ z4; + const uint32_t tc7 = z12 ^ tc4; + const uint32_t tc8 = z7 ^ tc6; + const uint32_t tc9 = z8 ^ tc7; + const uint32_t tc10 = tc8 ^ tc9; + const uint32_t tc11 = tc6 ^ tc5; + const uint32_t tc12 = z3 ^ z5; + const uint32_t tc13 = z13 ^ tc1; + const uint32_t tc14 = tc4 ^ tc12; + const uint32_t S3 = tc3 ^ tc11; + const uint32_t tc16 = z6 ^ tc8; + const uint32_t tc17 = z14 ^ tc10; + const uint32_t tc18 = ~tc13 ^ tc14; + const uint32_t S7 = z12 ^ tc18; + const uint32_t tc20 = z15 ^ tc16; + const uint32_t tc21 = tc2 ^ z11; + const uint32_t S0 = tc3 ^ tc16; + const uint32_t S6 = tc10 ^ tc18; + const uint32_t S4 = tc14 ^ S3; + const uint32_t S1 = ~(S3 ^ tc16); + const uint32_t tc26 = tc17 ^ tc20; + const uint32_t S2 = ~(tc26 ^ z17); + const uint32_t S5 = tc21 ^ tc17; + + V[0] = S0; + V[1] = S1; + V[2] = S2; + V[3] = S3; + V[4] = S4; + V[5] = S5; + V[6] = S6; + V[7] = S7; + } + +/* +A circuit for inverse AES Sbox of size 121 and depth 21 from +http://www.cs.yale.edu/homes/peralta/CircuitStuff/CMT.html +http://www.cs.yale.edu/homes/peralta/CircuitStuff/Sinv.txt +*/ +void AES_INV_SBOX(uint32_t V[8]) + { + const uint32_t U0 = V[0]; + const uint32_t U1 = V[1]; + const uint32_t U2 = V[2]; + const uint32_t U3 = V[3]; + const uint32_t U4 = V[4]; + const uint32_t U5 = V[5]; + const uint32_t U6 = V[6]; + const uint32_t U7 = V[7]; + + const uint32_t Y0 = U0 ^ U3; + const uint32_t Y2 = ~(U1 ^ U3); + const uint32_t Y4 = U0 ^ Y2; + const uint32_t RTL0 = U6 ^ U7; + const uint32_t Y1 = Y2 ^ RTL0; + const uint32_t Y7 = ~(U2 ^ Y1); + const uint32_t RTL1 = U3 ^ U4; + const uint32_t Y6 = ~(U7 ^ RTL1); + const uint32_t Y3 = Y1 ^ RTL1; + const uint32_t RTL2 = ~(U0 ^ U2); + const uint32_t Y5 = U5 ^ RTL2; + const uint32_t sa1 = Y0 ^ Y2; + const uint32_t sa0 = Y1 ^ Y3; + const uint32_t sb1 = Y4 ^ Y6; + const uint32_t sb0 = Y5 ^ Y7; + const uint32_t ah = Y0 ^ Y1; + const uint32_t al = Y2 ^ Y3; + const uint32_t aa = sa0 ^ sa1; + const uint32_t bh = Y4 ^ Y5; + const uint32_t bl = Y6 ^ Y7; + const uint32_t bb = sb0 ^ sb1; + const uint32_t ab20 = sa0 ^ sb0; + const uint32_t ab22 = al ^ bl; + const uint32_t ab23 = Y3 ^ Y7; + const uint32_t ab21 = sa1 ^ sb1; + const uint32_t abcd1 = ah & bh; + const uint32_t rr1 = Y0 & Y4; + const uint32_t ph11 = ab20 ^ abcd1; + const uint32_t t01 = Y1 & Y5; + const uint32_t ph01 = t01 ^ abcd1; + const uint32_t abcd2 = al & bl; + const uint32_t r1 = Y2 & Y6; + const uint32_t pl11 = ab22 ^ abcd2; + const uint32_t r2 = Y3 & Y7; + const uint32_t pl01 = r2 ^ abcd2; + const uint32_t r3 = sa0 & sb0; + const uint32_t vr1 = aa & bb; + const uint32_t pr1 = vr1 ^ r3; + const uint32_t wr1 = sa1 & sb1; + const uint32_t qr1 = wr1 ^ r3; + const uint32_t ab0 = ph11 ^ rr1; + const uint32_t ab1 = ph01 ^ ab21; + const uint32_t ab2 = pl11 ^ r1; + const uint32_t ab3 = pl01 ^ qr1; + const uint32_t cp1 = ab0 ^ pr1; + const uint32_t cp2 = ab1 ^ qr1; + const uint32_t cp3 = ab2 ^ pr1; + const uint32_t cp4 = ab3 ^ ab23; + const uint32_t tinv1 = cp3 ^ cp4; + const uint32_t tinv2 = cp3 & cp1; + const uint32_t tinv3 = cp2 ^ tinv2; + const uint32_t tinv4 = cp1 ^ cp2; + const uint32_t tinv5 = cp4 ^ tinv2; + const uint32_t tinv6 = tinv5 & tinv4; + const uint32_t tinv7 = tinv3 & tinv1; + const uint32_t d2 = cp4 ^ tinv7; + const uint32_t d0 = cp2 ^ tinv6; + const uint32_t tinv8 = cp1 & cp4; + const uint32_t tinv9 = tinv4 & tinv8; + const uint32_t tinv10 = tinv4 ^ tinv2; + const uint32_t d1 = tinv9 ^ tinv10; + const uint32_t tinv11 = cp2 & cp3; + const uint32_t tinv12 = tinv1 & tinv11; + const uint32_t tinv13 = tinv1 ^ tinv2; + const uint32_t d3 = tinv12 ^ tinv13; + const uint32_t sd1 = d1 ^ d3; + const uint32_t sd0 = d0 ^ d2; + const uint32_t dl = d0 ^ d1; + const uint32_t dh = d2 ^ d3; + const uint32_t dd = sd0 ^ sd1; + const uint32_t abcd3 = dh & bh; + const uint32_t rr2 = d3 & Y4; + const uint32_t t02 = d2 & Y5; + const uint32_t abcd4 = dl & bl; + const uint32_t r4 = d1 & Y6; + const uint32_t r5 = d0 & Y7; + const uint32_t r6 = sd0 & sb0; + const uint32_t vr2 = dd & bb; + const uint32_t wr2 = sd1 & sb1; + const uint32_t abcd5 = dh & ah; + const uint32_t r7 = d3 & Y0; + const uint32_t r8 = d2 & Y1; + const uint32_t abcd6 = dl & al; + const uint32_t r9 = d1 & Y2; + const uint32_t r10 = d0 & Y3; + const uint32_t r11 = sd0 & sa0; + const uint32_t vr3 = dd & aa; + const uint32_t wr3 = sd1 & sa1; + const uint32_t ph12 = rr2 ^ abcd3; + const uint32_t ph02 = t02 ^ abcd3; + const uint32_t pl12 = r4 ^ abcd4; + const uint32_t pl02 = r5 ^ abcd4; + const uint32_t pr2 = vr2 ^ r6; + const uint32_t qr2 = wr2 ^ r6; + const uint32_t p0 = ph12 ^ pr2; + const uint32_t p1 = ph02 ^ qr2; + const uint32_t p2 = pl12 ^ pr2; + const uint32_t p3 = pl02 ^ qr2; + const uint32_t ph13 = r7 ^ abcd5; + const uint32_t ph03 = r8 ^ abcd5; + const uint32_t pl13 = r9 ^ abcd6; + const uint32_t pl03 = r10 ^ abcd6; + const uint32_t pr3 = vr3 ^ r11; + const uint32_t qr3 = wr3 ^ r11; + const uint32_t p4 = ph13 ^ pr3; + const uint32_t S7 = ph03 ^ qr3; + const uint32_t p6 = pl13 ^ pr3; + const uint32_t p7 = pl03 ^ qr3; + const uint32_t S3 = p1 ^ p6; + const uint32_t S6 = p2 ^ p6; + const uint32_t S0 = p3 ^ p6; + const uint32_t X11 = p0 ^ p2; + const uint32_t S5 = S0 ^ X11; + const uint32_t X13 = p4 ^ p7; + const uint32_t X14 = X11 ^ X13; + const uint32_t S1 = S3 ^ X14; + const uint32_t X16 = p1 ^ S7; + const uint32_t S2 = X14 ^ X16; + const uint32_t X18 = p0 ^ p4; + const uint32_t X19 = S5 ^ X16; + const uint32_t S4 = X18 ^ X19; + + V[0] = S0; + V[1] = S1; + V[2] = S2; + V[3] = S3; + V[4] = S4; + V[5] = S5; + V[6] = S6; + V[7] = S7; + } + +inline void bit_transpose(uint32_t B[8]) + { + swap_bits<uint32_t>(B[1], B[0], 0x55555555, 1); + swap_bits<uint32_t>(B[3], B[2], 0x55555555, 1); + swap_bits<uint32_t>(B[5], B[4], 0x55555555, 1); + swap_bits<uint32_t>(B[7], B[6], 0x55555555, 1); + + swap_bits<uint32_t>(B[2], B[0], 0x33333333, 2); + swap_bits<uint32_t>(B[3], B[1], 0x33333333, 2); + swap_bits<uint32_t>(B[6], B[4], 0x33333333, 2); + swap_bits<uint32_t>(B[7], B[5], 0x33333333, 2); + + swap_bits<uint32_t>(B[4], B[0], 0x0F0F0F0F, 4); + swap_bits<uint32_t>(B[5], B[1], 0x0F0F0F0F, 4); + swap_bits<uint32_t>(B[6], B[2], 0x0F0F0F0F, 4); + swap_bits<uint32_t>(B[7], B[3], 0x0F0F0F0F, 4); + } + +inline void ks_expand(uint32_t B[8], const uint32_t K[], size_t r) + { + /* + This is bit_transpose of K[r..r+4] || K[r..r+4], we can save some computation + due to knowing the first and second halves are the same data. + */ + for(size_t i = 0; i != 4; ++i) + B[i] = K[r + i]; + + swap_bits<uint32_t>(B[1], B[0], 0x55555555, 1); + swap_bits<uint32_t>(B[3], B[2], 0x55555555, 1); + + swap_bits<uint32_t>(B[2], B[0], 0x33333333, 2); + swap_bits<uint32_t>(B[3], B[1], 0x33333333, 2); + + B[4] = B[0]; + B[5] = B[1]; + B[6] = B[2]; + B[7] = B[3]; + + swap_bits<uint32_t>(B[4], B[0], 0x0F0F0F0F, 4); + swap_bits<uint32_t>(B[5], B[1], 0x0F0F0F0F, 4); + swap_bits<uint32_t>(B[6], B[2], 0x0F0F0F0F, 4); + swap_bits<uint32_t>(B[7], B[3], 0x0F0F0F0F, 4); + } + +inline void shift_rows(uint32_t B[8]) + { + // 3 0 1 2 7 4 5 6 10 11 8 9 14 15 12 13 17 18 19 16 21 22 23 20 24 25 26 27 28 29 30 31 +#if defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT) + for(size_t i = 0; i != 8; i += 2) + { + uint64_t x = (static_cast<uint64_t>(B[i]) << 32) | B[i+1]; + x = bit_permute_step<uint64_t>(x, 0x0022331100223311, 2); + x = bit_permute_step<uint64_t>(x, 0x0055005500550055, 1); + B[i] = static_cast<uint32_t>(x >> 32); + B[i+1] = static_cast<uint32_t>(x); + } +#else + for(size_t i = 0; i != 8; ++i) + { + uint32_t x = B[i]; + x = bit_permute_step<uint32_t>(x, 0x00223311, 2); + x = bit_permute_step<uint32_t>(x, 0x00550055, 1); + B[i] = x; + } +#endif + } + +inline void inv_shift_rows(uint32_t B[8]) + { + // Inverse of shift_rows, just inverting the steps + +#if defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT) + for(size_t i = 0; i != 8; i += 2) + { + uint64_t x = (static_cast<uint64_t>(B[i]) << 32) | B[i+1]; + x = bit_permute_step<uint64_t>(x, 0x0055005500550055, 1); + x = bit_permute_step<uint64_t>(x, 0x0022331100223311, 2); + B[i] = static_cast<uint32_t>(x >> 32); + B[i+1] = static_cast<uint32_t>(x); + } +#else + for(size_t i = 0; i != 8; ++i) + { + uint32_t x = B[i]; + x = bit_permute_step<uint32_t>(x, 0x00550055, 1); + x = bit_permute_step<uint32_t>(x, 0x00223311, 2); + B[i] = x; + } +#endif + } + +inline void mix_columns(uint32_t B[8]) + { + // carry high bits in B[0] to positions in 0x1b == 0b11011 + const uint32_t X2[8] = { + B[1], + B[2], + B[3], + B[4] ^ B[0], + B[5] ^ B[0], + B[6], + B[7] ^ B[0], + B[0], + }; + + for(size_t i = 0; i != 8; i++) + { + const uint32_t X3 = B[i] ^ X2[i]; + B[i] = X2[i] ^ rotr<8>(B[i]) ^ rotr<16>(B[i]) ^ rotr<24>(X3); + } + } + +void inv_mix_columns(uint32_t B[8]) + { + /* + OpenSSL's bsaes implementation credits Jussi Kivilinna with the lovely + matrix decomposition + + | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 | + | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 | + | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 | + | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 | + + Notice the first component is simply the MixColumns matrix. So we can + multiply first by (05,00,04,00) then perform MixColumns to get the equivalent + of InvMixColumn. + */ + const uint32_t X4[8] = { + B[2], + B[3], + B[4] ^ B[0], + B[5] ^ B[0] ^ B[1], + B[6] ^ B[1], + B[7] ^ B[0], + B[0] ^ B[1], + B[1], + }; + + for(size_t i = 0; i != 8; i++) + { + const uint32_t X5 = X4[i] ^ B[i]; + B[i] = X5 ^ rotr<16>(X4[i]); + } + + mix_columns(B); + } + +/* +* AES Encryption +*/ +void aes_encrypt_n(const uint8_t in[], uint8_t out[], + size_t blocks, + const secure_vector<uint32_t>& EK) + { + BOTAN_ASSERT(EK.size() == 44 || EK.size() == 52 || EK.size() == 60, "Key was set"); + + const size_t rounds = (EK.size() - 4) / 4; + + uint32_t KS[13*8] = { 0 }; // actual maximum is (rounds - 1) * 8 + for(size_t i = 0; i < rounds - 1; i += 1) + { + ks_expand(&KS[8*i], EK.data(), 4*i + 4); + } + + const size_t BLOCK_SIZE = 16; + const size_t BITSLICED_BLOCKS = 8*sizeof(uint32_t) / BLOCK_SIZE; + + while(blocks > 0) + { + const size_t this_loop = std::min(blocks, BITSLICED_BLOCKS); + + uint32_t B[8] = { 0 }; + + load_be(B, in, this_loop*4); + + for(size_t i = 0; i != 8; ++i) + B[i] ^= EK[i % 4]; + + bit_transpose(B); + + for(size_t r = 0; r != rounds - 1; ++r) + { + AES_SBOX(B); + shift_rows(B); + mix_columns(B); + + for(size_t i = 0; i != 8; ++i) + B[i] ^= KS[8*r + i]; + } + + // Final round: + AES_SBOX(B); + shift_rows(B); + bit_transpose(B); + + for(size_t i = 0; i != 8; ++i) + B[i] ^= EK[4*rounds + i % 4]; + + copy_out_be(out, this_loop*4*sizeof(uint32_t), B); + + in += this_loop * BLOCK_SIZE; + out += this_loop * BLOCK_SIZE; + blocks -= this_loop; + } + } + +/* +* AES Decryption +*/ +void aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, + const secure_vector<uint32_t>& DK) + { + BOTAN_ASSERT(DK.size() == 44 || DK.size() == 52 || DK.size() == 60, "Key was set"); + + const size_t rounds = (DK.size() - 4) / 4; + + uint32_t KS[13*8] = { 0 }; // actual maximum is (rounds - 1) * 8 + for(size_t i = 0; i < rounds - 1; i += 1) + { + ks_expand(&KS[8*i], DK.data(), 4*i + 4); + } + + const size_t BLOCK_SIZE = 16; + const size_t BITSLICED_BLOCKS = 8*sizeof(uint32_t) / BLOCK_SIZE; + + while(blocks > 0) + { + const size_t this_loop = std::min(blocks, BITSLICED_BLOCKS); + + uint32_t B[8] = { 0 }; + + load_be(B, in, this_loop*4); + + for(size_t i = 0; i != 8; ++i) + B[i] ^= DK[i % 4]; + + bit_transpose(B); + + for(size_t r = 0; r != rounds - 1; ++r) + { + AES_INV_SBOX(B); + inv_shift_rows(B); + inv_mix_columns(B); + + for(size_t i = 0; i != 8; ++i) + B[i] ^= KS[8*r + i]; + } + + // Final round: + AES_INV_SBOX(B); + inv_shift_rows(B); + bit_transpose(B); + + for(size_t i = 0; i != 8; ++i) + B[i] ^= DK[4*rounds + i % 4]; + + copy_out_be(out, this_loop*4*sizeof(uint32_t), B); + + in += this_loop * BLOCK_SIZE; + out += this_loop * BLOCK_SIZE; + blocks -= this_loop; + } + } + +inline uint32_t xtime32(uint32_t s) + { + const uint32_t lo_bit = 0x01010101; + const uint32_t mask = 0x7F7F7F7F; + const uint32_t poly = 0x1B; + + return ((s & mask) << 1) ^ (((s >> 7) & lo_bit) * poly); + } + +inline uint32_t InvMixColumn(uint32_t s1) + { + const uint32_t s2 = xtime32(s1); + const uint32_t s4 = xtime32(s2); + const uint32_t s8 = xtime32(s4); + const uint32_t s9 = s8 ^ s1; + const uint32_t s11 = s9 ^ s2; + const uint32_t s13 = s9 ^ s4; + const uint32_t s14 = s8 ^ s4 ^ s2; + + return s14 ^ rotr<8>(s9) ^ rotr<16>(s13) ^ rotr<24>(s11); + } + +void InvMixColumn_x4(uint32_t x[4]) + { + x[0] = InvMixColumn(x[0]); + x[1] = InvMixColumn(x[1]); + x[2] = InvMixColumn(x[2]); + x[3] = InvMixColumn(x[3]); + } + +uint32_t SE_word(uint32_t x) + { + uint32_t I[8] = { 0 }; + + for(size_t i = 0; i != 8; ++i) + I[i] = (x >> (7-i)) & 0x01010101; + + AES_SBOX(I); + + x = 0; + + for(size_t i = 0; i != 8; ++i) + x |= ((I[i] & 0x01010101) << (7-i)); + + return x; + } + +void aes_key_schedule(const uint8_t key[], size_t length, + secure_vector<uint32_t>& EK, + secure_vector<uint32_t>& DK, + bool bswap_keys = false) + { + static const uint32_t RC[10] = { + 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, + 0x20000000, 0x40000000, 0x80000000, 0x1B000000, 0x36000000 }; + + const size_t X = length / 4; + + // Can't happen, but make static analyzers happy + BOTAN_ASSERT_NOMSG(X == 4 || X == 6 || X == 8); + + const size_t rounds = (length / 4) + 6; + + // Help the optimizer + BOTAN_ASSERT_NOMSG(rounds == 10 || rounds == 12 || rounds == 14); + + CT::poison(key, length); + + EK.resize(length + 28); + DK.resize(length + 28); + + for(size_t i = 0; i != X; ++i) + EK[i] = load_be<uint32_t>(key, i); + + for(size_t i = X; i < 4*(rounds+1); i += X) + { + EK[i] = EK[i-X] ^ RC[(i-X)/X] ^ rotl<8>(SE_word(EK[i-1])); + + for(size_t j = 1; j != X && (i+j) < EK.size(); ++j) + { + EK[i+j] = EK[i+j-X]; + + if(X == 8 && j == 4) + EK[i+j] ^= SE_word(EK[i+j-1]); + else + EK[i+j] ^= EK[i+j-1]; + } + } + + for(size_t i = 0; i != 4*(rounds+1); i += 4) + { + DK[i ] = EK[4*rounds - i ]; + DK[i+1] = EK[4*rounds - i+1]; + DK[i+2] = EK[4*rounds - i+2]; + DK[i+3] = EK[4*rounds - i+3]; + } + + for(size_t i = 4; i != 4*rounds; i += 4) + { + InvMixColumn_x4(&DK[i]); + } + + if(bswap_keys) + { + // HW AES on little endian needs the subkeys to be byte reversed + for(size_t i = 0; i != EK.size(); ++i) + EK[i] = reverse_bytes(EK[i]); + for(size_t i = 0; i != DK.size(); ++i) + DK[i] = reverse_bytes(DK[i]); + } + + CT::unpoison(EK.data(), EK.size()); + CT::unpoison(DK.data(), DK.size()); + CT::unpoison(key, length); + } + +size_t aes_parallelism() + { +#if defined(BOTAN_HAS_HW_AES_SUPPORT) + if(CPUID::has_hw_aes()) + { + return 4; // pipelined + } +#endif + +#if defined(BOTAN_HAS_AES_VPERM) + if(CPUID::has_vperm()) + { + return 2; // pipelined + } +#endif + + // bitsliced: + return 2; + } + +const char* aes_provider() + { +#if defined(BOTAN_HAS_HW_AES_SUPPORT) + if(CPUID::has_hw_aes()) + { + return "cpu"; + } +#endif + +#if defined(BOTAN_HAS_AES_VPERM) + if(CPUID::has_vperm()) + { + return "vperm"; + } +#endif + + return "base"; + } + +} + +std::string AES_128::provider() const { return aes_provider(); } +std::string AES_192::provider() const { return aes_provider(); } +std::string AES_256::provider() const { return aes_provider(); } + +size_t AES_128::parallelism() const { return aes_parallelism(); } +size_t AES_192::parallelism() const { return aes_parallelism(); } +size_t AES_256::parallelism() const { return aes_parallelism(); } + +void AES_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_EK.empty() == false); + +#if defined(BOTAN_HAS_HW_AES_SUPPORT) + if(CPUID::has_hw_aes()) + { + return hw_aes_encrypt_n(in, out, blocks); + } +#endif + +#if defined(BOTAN_HAS_AES_VPERM) + if(CPUID::has_vperm()) + { + return vperm_encrypt_n(in, out, blocks); + } +#endif + + aes_encrypt_n(in, out, blocks, m_EK); + } + +void AES_128::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_DK.empty() == false); + +#if defined(BOTAN_HAS_HW_AES_SUPPORT) + if(CPUID::has_hw_aes()) + { + return hw_aes_decrypt_n(in, out, blocks); + } +#endif + +#if defined(BOTAN_HAS_AES_VPERM) + if(CPUID::has_vperm()) + { + return vperm_decrypt_n(in, out, blocks); + } +#endif + + aes_decrypt_n(in, out, blocks, m_DK); + } + +void AES_128::key_schedule(const uint8_t key[], size_t length) + { +#if defined(BOTAN_HAS_AES_NI) + if(CPUID::has_aes_ni()) + { + return aesni_key_schedule(key, length); + } +#endif + +#if defined(BOTAN_HAS_HW_AES_SUPPORT) + if(CPUID::has_hw_aes()) + { + return aes_key_schedule(key, length, m_EK, m_DK, CPUID::is_little_endian()); + } +#endif + +#if defined(BOTAN_HAS_AES_VPERM) + if(CPUID::has_vperm()) + { + return vperm_key_schedule(key, length); + } +#endif + + aes_key_schedule(key, length, m_EK, m_DK); + } + +void AES_128::clear() + { + zap(m_EK); + zap(m_DK); + } + +void AES_192::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_EK.empty() == false); + +#if defined(BOTAN_HAS_HW_AES_SUPPORT) + if(CPUID::has_hw_aes()) + { + return hw_aes_encrypt_n(in, out, blocks); + } +#endif + +#if defined(BOTAN_HAS_AES_VPERM) + if(CPUID::has_vperm()) + { + return vperm_encrypt_n(in, out, blocks); + } +#endif + + aes_encrypt_n(in, out, blocks, m_EK); + } + +void AES_192::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_DK.empty() == false); + +#if defined(BOTAN_HAS_HW_AES_SUPPORT) + if(CPUID::has_hw_aes()) + { + return hw_aes_decrypt_n(in, out, blocks); + } +#endif + +#if defined(BOTAN_HAS_AES_VPERM) + if(CPUID::has_vperm()) + { + return vperm_decrypt_n(in, out, blocks); + } +#endif + + aes_decrypt_n(in, out, blocks, m_DK); + } + +void AES_192::key_schedule(const uint8_t key[], size_t length) + { +#if defined(BOTAN_HAS_AES_NI) + if(CPUID::has_aes_ni()) + { + return aesni_key_schedule(key, length); + } +#endif + +#if defined(BOTAN_HAS_HW_AES_SUPPORT) + if(CPUID::has_hw_aes()) + { + return aes_key_schedule(key, length, m_EK, m_DK, CPUID::is_little_endian()); + } +#endif + +#if defined(BOTAN_HAS_AES_VPERM) + if(CPUID::has_vperm()) + { + return vperm_key_schedule(key, length); + } +#endif + + aes_key_schedule(key, length, m_EK, m_DK); + } + +void AES_192::clear() + { + zap(m_EK); + zap(m_DK); + } + +void AES_256::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_EK.empty() == false); + +#if defined(BOTAN_HAS_HW_AES_SUPPORT) + if(CPUID::has_hw_aes()) + { + return hw_aes_encrypt_n(in, out, blocks); + } +#endif + +#if defined(BOTAN_HAS_AES_VPERM) + if(CPUID::has_vperm()) + { + return vperm_encrypt_n(in, out, blocks); + } +#endif + + aes_encrypt_n(in, out, blocks, m_EK); + } + +void AES_256::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_DK.empty() == false); + +#if defined(BOTAN_HAS_HW_AES_SUPPORT) + if(CPUID::has_hw_aes()) + { + return hw_aes_decrypt_n(in, out, blocks); + } +#endif + +#if defined(BOTAN_HAS_AES_VPERM) + if(CPUID::has_vperm()) + { + return vperm_decrypt_n(in, out, blocks); + } +#endif + + aes_decrypt_n(in, out, blocks, m_DK); + } + +void AES_256::key_schedule(const uint8_t key[], size_t length) + { +#if defined(BOTAN_HAS_AES_NI) + if(CPUID::has_aes_ni()) + { + return aesni_key_schedule(key, length); + } +#endif + +#if defined(BOTAN_HAS_HW_AES_SUPPORT) + if(CPUID::has_hw_aes()) + { + return aes_key_schedule(key, length, m_EK, m_DK, CPUID::is_little_endian()); + } +#endif + +#if defined(BOTAN_HAS_AES_VPERM) + if(CPUID::has_vperm()) + { + return vperm_key_schedule(key, length); + } +#endif + + aes_key_schedule(key, length, m_EK, m_DK); + } + +void AES_256::clear() + { + zap(m_EK); + zap(m_DK); + } + +} diff --git a/comm/third_party/botan/src/lib/block/aes/aes.h b/comm/third_party/botan/src/lib/block/aes/aes.h new file mode 100644 index 0000000000..76248200d4 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/aes/aes.h @@ -0,0 +1,131 @@ +/* +* AES +* (C) 1999-2010 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_AES_H_ +#define BOTAN_AES_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(aes.h) + +namespace Botan { + +/** +* AES-128 +*/ +class BOTAN_PUBLIC_API(2,0) AES_128 final : public Block_Cipher_Fixed_Params<16, 16> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + + std::string provider() const override; + std::string name() const override { return "AES-128"; } + BlockCipher* clone() const override { return new AES_128; } + size_t parallelism() const override; + + private: + void key_schedule(const uint8_t key[], size_t length) override; + +#if defined(BOTAN_HAS_AES_VPERM) + void vperm_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; + void vperm_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; + void vperm_key_schedule(const uint8_t key[], size_t length); +#endif + +#if defined(BOTAN_HAS_AES_NI) + void aesni_key_schedule(const uint8_t key[], size_t length); +#endif + +#if defined(BOTAN_HAS_AES_POWER8) || defined(BOTAN_HAS_AES_ARMV8) || defined(BOTAN_HAS_AES_NI) + void hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; + void hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; +#endif + + secure_vector<uint32_t> m_EK, m_DK; + }; + +/** +* AES-192 +*/ +class BOTAN_PUBLIC_API(2,0) AES_192 final : public Block_Cipher_Fixed_Params<16, 24> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + + std::string provider() const override; + std::string name() const override { return "AES-192"; } + BlockCipher* clone() const override { return new AES_192; } + size_t parallelism() const override; + + private: +#if defined(BOTAN_HAS_AES_VPERM) + void vperm_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; + void vperm_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; + void vperm_key_schedule(const uint8_t key[], size_t length); +#endif + +#if defined(BOTAN_HAS_AES_NI) + void aesni_key_schedule(const uint8_t key[], size_t length); +#endif + +#if defined(BOTAN_HAS_AES_POWER8) || defined(BOTAN_HAS_AES_ARMV8) || defined(BOTAN_HAS_AES_NI) + void hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; + void hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; +#endif + + void key_schedule(const uint8_t key[], size_t length) override; + + secure_vector<uint32_t> m_EK, m_DK; + }; + +/** +* AES-256 +*/ +class BOTAN_PUBLIC_API(2,0) AES_256 final : public Block_Cipher_Fixed_Params<16, 32> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + + std::string provider() const override; + + std::string name() const override { return "AES-256"; } + BlockCipher* clone() const override { return new AES_256; } + size_t parallelism() const override; + + private: +#if defined(BOTAN_HAS_AES_VPERM) + void vperm_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; + void vperm_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; + void vperm_key_schedule(const uint8_t key[], size_t length); +#endif + +#if defined(BOTAN_HAS_AES_NI) + void aesni_key_schedule(const uint8_t key[], size_t length); +#endif + +#if defined(BOTAN_HAS_AES_POWER8) || defined(BOTAN_HAS_AES_ARMV8) || defined(BOTAN_HAS_AES_NI) + void hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; + void hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; +#endif + + void key_schedule(const uint8_t key[], size_t length) override; + + secure_vector<uint32_t> m_EK, m_DK; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/aes/aes_armv8/aes_armv8.cpp b/comm/third_party/botan/src/lib/block/aes/aes_armv8/aes_armv8.cpp new file mode 100644 index 0000000000..9766bf88c9 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/aes/aes_armv8/aes_armv8.cpp @@ -0,0 +1,484 @@ +/* +* AES using ARMv8 +* Contributed by Jeffrey Walton +* +* Further changes +* (C) 2017,2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/aes.h> +#include <botan/loadstor.h> +#include <arm_neon.h> + +namespace Botan { + +#define AES_ENC_4_ROUNDS(K) \ + do \ + { \ + B0 = vaesmcq_u8(vaeseq_u8(B0, K)); \ + B1 = vaesmcq_u8(vaeseq_u8(B1, K)); \ + B2 = vaesmcq_u8(vaeseq_u8(B2, K)); \ + B3 = vaesmcq_u8(vaeseq_u8(B3, K)); \ + } while(0) + +#define AES_ENC_4_LAST_ROUNDS(K, K2) \ + do \ + { \ + B0 = veorq_u8(vaeseq_u8(B0, K), K2); \ + B1 = veorq_u8(vaeseq_u8(B1, K), K2); \ + B2 = veorq_u8(vaeseq_u8(B2, K), K2); \ + B3 = veorq_u8(vaeseq_u8(B3, K), K2); \ + } while(0) + +#define AES_DEC_4_ROUNDS(K) \ + do \ + { \ + B0 = vaesimcq_u8(vaesdq_u8(B0, K)); \ + B1 = vaesimcq_u8(vaesdq_u8(B1, K)); \ + B2 = vaesimcq_u8(vaesdq_u8(B2, K)); \ + B3 = vaesimcq_u8(vaesdq_u8(B3, K)); \ + } while(0) + +#define AES_DEC_4_LAST_ROUNDS(K, K2) \ + do \ + { \ + B0 = veorq_u8(vaesdq_u8(B0, K), K2); \ + B1 = veorq_u8(vaesdq_u8(B1, K), K2); \ + B2 = veorq_u8(vaesdq_u8(B2, K), K2); \ + B3 = veorq_u8(vaesdq_u8(B3, K), K2); \ + } while(0) + +/* +* AES-128 Encryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_128::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0*16); + const uint8x16_t K1 = vld1q_u8(skey + 1*16); + const uint8x16_t K2 = vld1q_u8(skey + 2*16); + const uint8x16_t K3 = vld1q_u8(skey + 3*16); + const uint8x16_t K4 = vld1q_u8(skey + 4*16); + const uint8x16_t K5 = vld1q_u8(skey + 5*16); + const uint8x16_t K6 = vld1q_u8(skey + 6*16); + const uint8x16_t K7 = vld1q_u8(skey + 7*16); + const uint8x16_t K8 = vld1q_u8(skey + 8*16); + const uint8x16_t K9 = vld1q_u8(skey + 9*16); + const uint8x16_t K10 = vld1q_u8(skey + 10*16); + + while(blocks >= 4) + { + uint8x16_t B0 = vld1q_u8(in); + uint8x16_t B1 = vld1q_u8(in+16); + uint8x16_t B2 = vld1q_u8(in+32); + uint8x16_t B3 = vld1q_u8(in+48); + + AES_ENC_4_ROUNDS(K0); + AES_ENC_4_ROUNDS(K1); + AES_ENC_4_ROUNDS(K2); + AES_ENC_4_ROUNDS(K3); + AES_ENC_4_ROUNDS(K4); + AES_ENC_4_ROUNDS(K5); + AES_ENC_4_ROUNDS(K6); + AES_ENC_4_ROUNDS(K7); + AES_ENC_4_ROUNDS(K8); + AES_ENC_4_LAST_ROUNDS(K9, K10); + + vst1q_u8(out, B0); + vst1q_u8(out+16, B1); + vst1q_u8(out+32, B2); + vst1q_u8(out+48, B3); + + in += 16*4; + out += 16*4; + blocks -= 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t B = vld1q_u8(in+16*i); + B = vaesmcq_u8(vaeseq_u8(B, K0)); + B = vaesmcq_u8(vaeseq_u8(B, K1)); + B = vaesmcq_u8(vaeseq_u8(B, K2)); + B = vaesmcq_u8(vaeseq_u8(B, K3)); + B = vaesmcq_u8(vaeseq_u8(B, K4)); + B = vaesmcq_u8(vaeseq_u8(B, K5)); + B = vaesmcq_u8(vaeseq_u8(B, K6)); + B = vaesmcq_u8(vaeseq_u8(B, K7)); + B = vaesmcq_u8(vaeseq_u8(B, K8)); + B = veorq_u8(vaeseq_u8(B, K9), K10); + vst1q_u8(out+16*i, B); + } + } + +/* +* AES-128 Decryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_128::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0*16); + const uint8x16_t K1 = vld1q_u8(skey + 1*16); + const uint8x16_t K2 = vld1q_u8(skey + 2*16); + const uint8x16_t K3 = vld1q_u8(skey + 3*16); + const uint8x16_t K4 = vld1q_u8(skey + 4*16); + const uint8x16_t K5 = vld1q_u8(skey + 5*16); + const uint8x16_t K6 = vld1q_u8(skey + 6*16); + const uint8x16_t K7 = vld1q_u8(skey + 7*16); + const uint8x16_t K8 = vld1q_u8(skey + 8*16); + const uint8x16_t K9 = vld1q_u8(skey + 9*16); + const uint8x16_t K10 = vld1q_u8(skey + 10*16); + + while(blocks >= 4) + { + uint8x16_t B0 = vld1q_u8(in); + uint8x16_t B1 = vld1q_u8(in+16); + uint8x16_t B2 = vld1q_u8(in+32); + uint8x16_t B3 = vld1q_u8(in+48); + + AES_DEC_4_ROUNDS(K0); + AES_DEC_4_ROUNDS(K1); + AES_DEC_4_ROUNDS(K2); + AES_DEC_4_ROUNDS(K3); + AES_DEC_4_ROUNDS(K4); + AES_DEC_4_ROUNDS(K5); + AES_DEC_4_ROUNDS(K6); + AES_DEC_4_ROUNDS(K7); + AES_DEC_4_ROUNDS(K8); + AES_DEC_4_LAST_ROUNDS(K9, K10); + + vst1q_u8(out, B0); + vst1q_u8(out+16, B1); + vst1q_u8(out+32, B2); + vst1q_u8(out+48, B3); + + in += 16*4; + out += 16*4; + blocks -= 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t B = vld1q_u8(in+16*i); + B = vaesimcq_u8(vaesdq_u8(B, K0)); + B = vaesimcq_u8(vaesdq_u8(B, K1)); + B = vaesimcq_u8(vaesdq_u8(B, K2)); + B = vaesimcq_u8(vaesdq_u8(B, K3)); + B = vaesimcq_u8(vaesdq_u8(B, K4)); + B = vaesimcq_u8(vaesdq_u8(B, K5)); + B = vaesimcq_u8(vaesdq_u8(B, K6)); + B = vaesimcq_u8(vaesdq_u8(B, K7)); + B = vaesimcq_u8(vaesdq_u8(B, K8)); + B = veorq_u8(vaesdq_u8(B, K9), K10); + vst1q_u8(out+16*i, B); + } + } + +/* +* AES-192 Encryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_192::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0*16); + const uint8x16_t K1 = vld1q_u8(skey + 1*16); + const uint8x16_t K2 = vld1q_u8(skey + 2*16); + const uint8x16_t K3 = vld1q_u8(skey + 3*16); + const uint8x16_t K4 = vld1q_u8(skey + 4*16); + const uint8x16_t K5 = vld1q_u8(skey + 5*16); + const uint8x16_t K6 = vld1q_u8(skey + 6*16); + const uint8x16_t K7 = vld1q_u8(skey + 7*16); + const uint8x16_t K8 = vld1q_u8(skey + 8*16); + const uint8x16_t K9 = vld1q_u8(skey + 9*16); + const uint8x16_t K10 = vld1q_u8(skey + 10*16); + const uint8x16_t K11 = vld1q_u8(skey + 11*16); + const uint8x16_t K12 = vld1q_u8(skey + 12*16); + + while(blocks >= 4) + { + uint8x16_t B0 = vld1q_u8(in); + uint8x16_t B1 = vld1q_u8(in+16); + uint8x16_t B2 = vld1q_u8(in+32); + uint8x16_t B3 = vld1q_u8(in+48); + + AES_ENC_4_ROUNDS(K0); + AES_ENC_4_ROUNDS(K1); + AES_ENC_4_ROUNDS(K2); + AES_ENC_4_ROUNDS(K3); + AES_ENC_4_ROUNDS(K4); + AES_ENC_4_ROUNDS(K5); + AES_ENC_4_ROUNDS(K6); + AES_ENC_4_ROUNDS(K7); + AES_ENC_4_ROUNDS(K8); + AES_ENC_4_ROUNDS(K9); + AES_ENC_4_ROUNDS(K10); + AES_ENC_4_LAST_ROUNDS(K11, K12); + + vst1q_u8(out, B0); + vst1q_u8(out+16, B1); + vst1q_u8(out+32, B2); + vst1q_u8(out+48, B3); + + in += 16*4; + out += 16*4; + blocks -= 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t B = vld1q_u8(in+16*i); + B = vaesmcq_u8(vaeseq_u8(B, K0)); + B = vaesmcq_u8(vaeseq_u8(B, K1)); + B = vaesmcq_u8(vaeseq_u8(B, K2)); + B = vaesmcq_u8(vaeseq_u8(B, K3)); + B = vaesmcq_u8(vaeseq_u8(B, K4)); + B = vaesmcq_u8(vaeseq_u8(B, K5)); + B = vaesmcq_u8(vaeseq_u8(B, K6)); + B = vaesmcq_u8(vaeseq_u8(B, K7)); + B = vaesmcq_u8(vaeseq_u8(B, K8)); + B = vaesmcq_u8(vaeseq_u8(B, K9)); + B = vaesmcq_u8(vaeseq_u8(B, K10)); + B = veorq_u8(vaeseq_u8(B, K11), K12); + vst1q_u8(out+16*i, B); + } + } + +/* +* AES-192 Decryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_192::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0*16); + const uint8x16_t K1 = vld1q_u8(skey + 1*16); + const uint8x16_t K2 = vld1q_u8(skey + 2*16); + const uint8x16_t K3 = vld1q_u8(skey + 3*16); + const uint8x16_t K4 = vld1q_u8(skey + 4*16); + const uint8x16_t K5 = vld1q_u8(skey + 5*16); + const uint8x16_t K6 = vld1q_u8(skey + 6*16); + const uint8x16_t K7 = vld1q_u8(skey + 7*16); + const uint8x16_t K8 = vld1q_u8(skey + 8*16); + const uint8x16_t K9 = vld1q_u8(skey + 9*16); + const uint8x16_t K10 = vld1q_u8(skey + 10*16); + const uint8x16_t K11 = vld1q_u8(skey + 11*16); + const uint8x16_t K12 = vld1q_u8(skey + 12*16); + + while(blocks >= 4) + { + uint8x16_t B0 = vld1q_u8(in); + uint8x16_t B1 = vld1q_u8(in+16); + uint8x16_t B2 = vld1q_u8(in+32); + uint8x16_t B3 = vld1q_u8(in+48); + + AES_DEC_4_ROUNDS(K0); + AES_DEC_4_ROUNDS(K1); + AES_DEC_4_ROUNDS(K2); + AES_DEC_4_ROUNDS(K3); + AES_DEC_4_ROUNDS(K4); + AES_DEC_4_ROUNDS(K5); + AES_DEC_4_ROUNDS(K6); + AES_DEC_4_ROUNDS(K7); + AES_DEC_4_ROUNDS(K8); + AES_DEC_4_ROUNDS(K9); + AES_DEC_4_ROUNDS(K10); + AES_DEC_4_LAST_ROUNDS(K11, K12); + + vst1q_u8(out, B0); + vst1q_u8(out+16, B1); + vst1q_u8(out+32, B2); + vst1q_u8(out+48, B3); + + in += 16*4; + out += 16*4; + blocks -= 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t B = vld1q_u8(in+16*i); + B = vaesimcq_u8(vaesdq_u8(B, K0)); + B = vaesimcq_u8(vaesdq_u8(B, K1)); + B = vaesimcq_u8(vaesdq_u8(B, K2)); + B = vaesimcq_u8(vaesdq_u8(B, K3)); + B = vaesimcq_u8(vaesdq_u8(B, K4)); + B = vaesimcq_u8(vaesdq_u8(B, K5)); + B = vaesimcq_u8(vaesdq_u8(B, K6)); + B = vaesimcq_u8(vaesdq_u8(B, K7)); + B = vaesimcq_u8(vaesdq_u8(B, K8)); + B = vaesimcq_u8(vaesdq_u8(B, K9)); + B = vaesimcq_u8(vaesdq_u8(B, K10)); + B = veorq_u8(vaesdq_u8(B, K11), K12); + vst1q_u8(out+16*i, B); + } + } + +/* +* AES-256 Encryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_256::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0*16); + const uint8x16_t K1 = vld1q_u8(skey + 1*16); + const uint8x16_t K2 = vld1q_u8(skey + 2*16); + const uint8x16_t K3 = vld1q_u8(skey + 3*16); + const uint8x16_t K4 = vld1q_u8(skey + 4*16); + const uint8x16_t K5 = vld1q_u8(skey + 5*16); + const uint8x16_t K6 = vld1q_u8(skey + 6*16); + const uint8x16_t K7 = vld1q_u8(skey + 7*16); + const uint8x16_t K8 = vld1q_u8(skey + 8*16); + const uint8x16_t K9 = vld1q_u8(skey + 9*16); + const uint8x16_t K10 = vld1q_u8(skey + 10*16); + const uint8x16_t K11 = vld1q_u8(skey + 11*16); + const uint8x16_t K12 = vld1q_u8(skey + 12*16); + const uint8x16_t K13 = vld1q_u8(skey + 13*16); + const uint8x16_t K14 = vld1q_u8(skey + 14*16); + + while(blocks >= 4) + { + uint8x16_t B0 = vld1q_u8(in); + uint8x16_t B1 = vld1q_u8(in+16); + uint8x16_t B2 = vld1q_u8(in+32); + uint8x16_t B3 = vld1q_u8(in+48); + + AES_ENC_4_ROUNDS(K0); + AES_ENC_4_ROUNDS(K1); + AES_ENC_4_ROUNDS(K2); + AES_ENC_4_ROUNDS(K3); + AES_ENC_4_ROUNDS(K4); + AES_ENC_4_ROUNDS(K5); + AES_ENC_4_ROUNDS(K6); + AES_ENC_4_ROUNDS(K7); + AES_ENC_4_ROUNDS(K8); + AES_ENC_4_ROUNDS(K9); + AES_ENC_4_ROUNDS(K10); + AES_ENC_4_ROUNDS(K11); + AES_ENC_4_ROUNDS(K12); + AES_ENC_4_LAST_ROUNDS(K13, K14); + + vst1q_u8(out, B0); + vst1q_u8(out+16, B1); + vst1q_u8(out+32, B2); + vst1q_u8(out+48, B3); + + in += 16*4; + out += 16*4; + blocks -= 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t B = vld1q_u8(in+16*i); + B = vaesmcq_u8(vaeseq_u8(B, K0)); + B = vaesmcq_u8(vaeseq_u8(B, K1)); + B = vaesmcq_u8(vaeseq_u8(B, K2)); + B = vaesmcq_u8(vaeseq_u8(B, K3)); + B = vaesmcq_u8(vaeseq_u8(B, K4)); + B = vaesmcq_u8(vaeseq_u8(B, K5)); + B = vaesmcq_u8(vaeseq_u8(B, K6)); + B = vaesmcq_u8(vaeseq_u8(B, K7)); + B = vaesmcq_u8(vaeseq_u8(B, K8)); + B = vaesmcq_u8(vaeseq_u8(B, K9)); + B = vaesmcq_u8(vaeseq_u8(B, K10)); + B = vaesmcq_u8(vaeseq_u8(B, K11)); + B = vaesmcq_u8(vaeseq_u8(B, K12)); + B = veorq_u8(vaeseq_u8(B, K13), K14); + vst1q_u8(out+16*i, B); + } + } + +/* +* AES-256 Decryption +*/ +BOTAN_FUNC_ISA("+crypto") +void AES_256::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data()); + + const uint8x16_t K0 = vld1q_u8(skey + 0*16); + const uint8x16_t K1 = vld1q_u8(skey + 1*16); + const uint8x16_t K2 = vld1q_u8(skey + 2*16); + const uint8x16_t K3 = vld1q_u8(skey + 3*16); + const uint8x16_t K4 = vld1q_u8(skey + 4*16); + const uint8x16_t K5 = vld1q_u8(skey + 5*16); + const uint8x16_t K6 = vld1q_u8(skey + 6*16); + const uint8x16_t K7 = vld1q_u8(skey + 7*16); + const uint8x16_t K8 = vld1q_u8(skey + 8*16); + const uint8x16_t K9 = vld1q_u8(skey + 9*16); + const uint8x16_t K10 = vld1q_u8(skey + 10*16); + const uint8x16_t K11 = vld1q_u8(skey + 11*16); + const uint8x16_t K12 = vld1q_u8(skey + 12*16); + const uint8x16_t K13 = vld1q_u8(skey + 13*16); + const uint8x16_t K14 = vld1q_u8(skey + 14*16); + + while(blocks >= 4) + { + uint8x16_t B0 = vld1q_u8(in); + uint8x16_t B1 = vld1q_u8(in+16); + uint8x16_t B2 = vld1q_u8(in+32); + uint8x16_t B3 = vld1q_u8(in+48); + + AES_DEC_4_ROUNDS(K0); + AES_DEC_4_ROUNDS(K1); + AES_DEC_4_ROUNDS(K2); + AES_DEC_4_ROUNDS(K3); + AES_DEC_4_ROUNDS(K4); + AES_DEC_4_ROUNDS(K5); + AES_DEC_4_ROUNDS(K6); + AES_DEC_4_ROUNDS(K7); + AES_DEC_4_ROUNDS(K8); + AES_DEC_4_ROUNDS(K9); + AES_DEC_4_ROUNDS(K10); + AES_DEC_4_ROUNDS(K11); + AES_DEC_4_ROUNDS(K12); + AES_DEC_4_LAST_ROUNDS(K13, K14); + + vst1q_u8(out, B0); + vst1q_u8(out+16, B1); + vst1q_u8(out+32, B2); + vst1q_u8(out+48, B3); + + in += 16*4; + out += 16*4; + blocks -= 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + uint8x16_t B = vld1q_u8(in+16*i); + B = vaesimcq_u8(vaesdq_u8(B, K0)); + B = vaesimcq_u8(vaesdq_u8(B, K1)); + B = vaesimcq_u8(vaesdq_u8(B, K2)); + B = vaesimcq_u8(vaesdq_u8(B, K3)); + B = vaesimcq_u8(vaesdq_u8(B, K4)); + B = vaesimcq_u8(vaesdq_u8(B, K5)); + B = vaesimcq_u8(vaesdq_u8(B, K6)); + B = vaesimcq_u8(vaesdq_u8(B, K7)); + B = vaesimcq_u8(vaesdq_u8(B, K8)); + B = vaesimcq_u8(vaesdq_u8(B, K9)); + B = vaesimcq_u8(vaesdq_u8(B, K10)); + B = vaesimcq_u8(vaesdq_u8(B, K11)); + B = vaesimcq_u8(vaesdq_u8(B, K12)); + B = veorq_u8(vaesdq_u8(B, K13), K14); + vst1q_u8(out+16*i, B); + } + } + +#undef AES_ENC_4_ROUNDS +#undef AES_ENC_4_LAST_ROUNDS +#undef AES_DEC_4_ROUNDS +#undef AES_DEC_4_LAST_ROUNDS + +} diff --git a/comm/third_party/botan/src/lib/block/aes/aes_armv8/info.txt b/comm/third_party/botan/src/lib/block/aes/aes_armv8/info.txt new file mode 100644 index 0000000000..1864f215b4 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/aes/aes_armv8/info.txt @@ -0,0 +1,12 @@ +<defines> +AES_ARMV8 -> 20170903 +</defines> + +<isa> +armv8crypto +</isa> + +<cc> +gcc:5 +clang:3.8 +</cc> diff --git a/comm/third_party/botan/src/lib/block/aes/aes_ni/aes_ni.cpp b/comm/third_party/botan/src/lib/block/aes/aes_ni/aes_ni.cpp new file mode 100644 index 0000000000..76c695f32c --- /dev/null +++ b/comm/third_party/botan/src/lib/block/aes/aes_ni/aes_ni.cpp @@ -0,0 +1,780 @@ +/* +* AES using AES-NI instructions +* (C) 2009,2012 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/aes.h> +#include <botan/loadstor.h> +#include <wmmintrin.h> + +namespace Botan { + +namespace { + +BOTAN_FUNC_ISA("ssse3") +__m128i aes_128_key_expansion(__m128i key, __m128i key_with_rcon) + { + key_with_rcon = _mm_shuffle_epi32(key_with_rcon, _MM_SHUFFLE(3,3,3,3)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + return _mm_xor_si128(key, key_with_rcon); + } + +BOTAN_FUNC_ISA("ssse3") +void aes_192_key_expansion(__m128i* K1, __m128i* K2, __m128i key2_with_rcon, + uint32_t out[], bool last) + { + __m128i key1 = *K1; + __m128i key2 = *K2; + + key2_with_rcon = _mm_shuffle_epi32(key2_with_rcon, _MM_SHUFFLE(1,1,1,1)); + key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4)); + key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4)); + key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4)); + key1 = _mm_xor_si128(key1, key2_with_rcon); + + *K1 = key1; + _mm_storeu_si128(reinterpret_cast<__m128i*>(out), key1); + + if(last) + return; + + key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4)); + key2 = _mm_xor_si128(key2, _mm_shuffle_epi32(key1, _MM_SHUFFLE(3,3,3,3))); + + *K2 = key2; + out[4] = _mm_cvtsi128_si32(key2); + out[5] = _mm_cvtsi128_si32(_mm_srli_si128(key2, 4)); + } + +/* +* The second half of the AES-256 key expansion (other half same as AES-128) +*/ +BOTAN_FUNC_ISA("ssse3,aes") +__m128i aes_256_key_expansion(__m128i key, __m128i key2) + { + __m128i key_with_rcon = _mm_aeskeygenassist_si128(key2, 0x00); + key_with_rcon = _mm_shuffle_epi32(key_with_rcon, _MM_SHUFFLE(2,2,2,2)); + + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + return _mm_xor_si128(key, key_with_rcon); + } + +} + +#define AES_ENC_4_ROUNDS(K) \ + do \ + { \ + B0 = _mm_aesenc_si128(B0, K); \ + B1 = _mm_aesenc_si128(B1, K); \ + B2 = _mm_aesenc_si128(B2, K); \ + B3 = _mm_aesenc_si128(B3, K); \ + } while(0) + +#define AES_ENC_4_LAST_ROUNDS(K) \ + do \ + { \ + B0 = _mm_aesenclast_si128(B0, K); \ + B1 = _mm_aesenclast_si128(B1, K); \ + B2 = _mm_aesenclast_si128(B2, K); \ + B3 = _mm_aesenclast_si128(B3, K); \ + } while(0) + +#define AES_DEC_4_ROUNDS(K) \ + do \ + { \ + B0 = _mm_aesdec_si128(B0, K); \ + B1 = _mm_aesdec_si128(B1, K); \ + B2 = _mm_aesdec_si128(B2, K); \ + B3 = _mm_aesdec_si128(B3, K); \ + } while(0) + +#define AES_DEC_4_LAST_ROUNDS(K) \ + do \ + { \ + B0 = _mm_aesdeclast_si128(B0, K); \ + B1 = _mm_aesdeclast_si128(B1, K); \ + B2 = _mm_aesdeclast_si128(B2, K); \ + B3 = _mm_aesdeclast_si128(B3, K); \ + } while(0) + +/* +* AES-128 Encryption +*/ +BOTAN_FUNC_ISA("ssse3,aes") +void AES_128::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const __m128i* in_mm = reinterpret_cast<const __m128i*>(in); + __m128i* out_mm = reinterpret_cast<__m128i*>(out); + + const __m128i* key_mm = reinterpret_cast<const __m128i*>(m_EK.data()); + + const __m128i K0 = _mm_loadu_si128(key_mm); + const __m128i K1 = _mm_loadu_si128(key_mm + 1); + const __m128i K2 = _mm_loadu_si128(key_mm + 2); + const __m128i K3 = _mm_loadu_si128(key_mm + 3); + const __m128i K4 = _mm_loadu_si128(key_mm + 4); + const __m128i K5 = _mm_loadu_si128(key_mm + 5); + const __m128i K6 = _mm_loadu_si128(key_mm + 6); + const __m128i K7 = _mm_loadu_si128(key_mm + 7); + const __m128i K8 = _mm_loadu_si128(key_mm + 8); + const __m128i K9 = _mm_loadu_si128(key_mm + 9); + const __m128i K10 = _mm_loadu_si128(key_mm + 10); + + while(blocks >= 4) + { + __m128i B0 = _mm_loadu_si128(in_mm + 0); + __m128i B1 = _mm_loadu_si128(in_mm + 1); + __m128i B2 = _mm_loadu_si128(in_mm + 2); + __m128i B3 = _mm_loadu_si128(in_mm + 3); + + B0 = _mm_xor_si128(B0, K0); + B1 = _mm_xor_si128(B1, K0); + B2 = _mm_xor_si128(B2, K0); + B3 = _mm_xor_si128(B3, K0); + + AES_ENC_4_ROUNDS(K1); + AES_ENC_4_ROUNDS(K2); + AES_ENC_4_ROUNDS(K3); + AES_ENC_4_ROUNDS(K4); + AES_ENC_4_ROUNDS(K5); + AES_ENC_4_ROUNDS(K6); + AES_ENC_4_ROUNDS(K7); + AES_ENC_4_ROUNDS(K8); + AES_ENC_4_ROUNDS(K9); + AES_ENC_4_LAST_ROUNDS(K10); + + _mm_storeu_si128(out_mm + 0, B0); + _mm_storeu_si128(out_mm + 1, B1); + _mm_storeu_si128(out_mm + 2, B2); + _mm_storeu_si128(out_mm + 3, B3); + + blocks -= 4; + in_mm += 4; + out_mm += 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + __m128i B = _mm_loadu_si128(in_mm + i); + + B = _mm_xor_si128(B, K0); + + B = _mm_aesenc_si128(B, K1); + B = _mm_aesenc_si128(B, K2); + B = _mm_aesenc_si128(B, K3); + B = _mm_aesenc_si128(B, K4); + B = _mm_aesenc_si128(B, K5); + B = _mm_aesenc_si128(B, K6); + B = _mm_aesenc_si128(B, K7); + B = _mm_aesenc_si128(B, K8); + B = _mm_aesenc_si128(B, K9); + B = _mm_aesenclast_si128(B, K10); + + _mm_storeu_si128(out_mm + i, B); + } + } + +/* +* AES-128 Decryption +*/ +BOTAN_FUNC_ISA("ssse3,aes") +void AES_128::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const __m128i* in_mm = reinterpret_cast<const __m128i*>(in); + __m128i* out_mm = reinterpret_cast<__m128i*>(out); + + const __m128i* key_mm = reinterpret_cast<const __m128i*>(m_DK.data()); + + const __m128i K0 = _mm_loadu_si128(key_mm); + const __m128i K1 = _mm_loadu_si128(key_mm + 1); + const __m128i K2 = _mm_loadu_si128(key_mm + 2); + const __m128i K3 = _mm_loadu_si128(key_mm + 3); + const __m128i K4 = _mm_loadu_si128(key_mm + 4); + const __m128i K5 = _mm_loadu_si128(key_mm + 5); + const __m128i K6 = _mm_loadu_si128(key_mm + 6); + const __m128i K7 = _mm_loadu_si128(key_mm + 7); + const __m128i K8 = _mm_loadu_si128(key_mm + 8); + const __m128i K9 = _mm_loadu_si128(key_mm + 9); + const __m128i K10 = _mm_loadu_si128(key_mm + 10); + + while(blocks >= 4) + { + __m128i B0 = _mm_loadu_si128(in_mm + 0); + __m128i B1 = _mm_loadu_si128(in_mm + 1); + __m128i B2 = _mm_loadu_si128(in_mm + 2); + __m128i B3 = _mm_loadu_si128(in_mm + 3); + + B0 = _mm_xor_si128(B0, K0); + B1 = _mm_xor_si128(B1, K0); + B2 = _mm_xor_si128(B2, K0); + B3 = _mm_xor_si128(B3, K0); + + AES_DEC_4_ROUNDS(K1); + AES_DEC_4_ROUNDS(K2); + AES_DEC_4_ROUNDS(K3); + AES_DEC_4_ROUNDS(K4); + AES_DEC_4_ROUNDS(K5); + AES_DEC_4_ROUNDS(K6); + AES_DEC_4_ROUNDS(K7); + AES_DEC_4_ROUNDS(K8); + AES_DEC_4_ROUNDS(K9); + AES_DEC_4_LAST_ROUNDS(K10); + + _mm_storeu_si128(out_mm + 0, B0); + _mm_storeu_si128(out_mm + 1, B1); + _mm_storeu_si128(out_mm + 2, B2); + _mm_storeu_si128(out_mm + 3, B3); + + blocks -= 4; + in_mm += 4; + out_mm += 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + __m128i B = _mm_loadu_si128(in_mm + i); + + B = _mm_xor_si128(B, K0); + + B = _mm_aesdec_si128(B, K1); + B = _mm_aesdec_si128(B, K2); + B = _mm_aesdec_si128(B, K3); + B = _mm_aesdec_si128(B, K4); + B = _mm_aesdec_si128(B, K5); + B = _mm_aesdec_si128(B, K6); + B = _mm_aesdec_si128(B, K7); + B = _mm_aesdec_si128(B, K8); + B = _mm_aesdec_si128(B, K9); + B = _mm_aesdeclast_si128(B, K10); + + _mm_storeu_si128(out_mm + i, B); + } + } + +/* +* AES-128 Key Schedule +*/ +BOTAN_FUNC_ISA("ssse3,aes") +void AES_128::aesni_key_schedule(const uint8_t key[], size_t) + { + m_EK.resize(44); + m_DK.resize(44); + + #define AES_128_key_exp(K, RCON) \ + aes_128_key_expansion(K, _mm_aeskeygenassist_si128(K, RCON)) + + const __m128i K0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(key)); + const __m128i K1 = AES_128_key_exp(K0, 0x01); + const __m128i K2 = AES_128_key_exp(K1, 0x02); + const __m128i K3 = AES_128_key_exp(K2, 0x04); + const __m128i K4 = AES_128_key_exp(K3, 0x08); + const __m128i K5 = AES_128_key_exp(K4, 0x10); + const __m128i K6 = AES_128_key_exp(K5, 0x20); + const __m128i K7 = AES_128_key_exp(K6, 0x40); + const __m128i K8 = AES_128_key_exp(K7, 0x80); + const __m128i K9 = AES_128_key_exp(K8, 0x1B); + const __m128i K10 = AES_128_key_exp(K9, 0x36); + + __m128i* EK_mm = reinterpret_cast<__m128i*>(m_EK.data()); + _mm_storeu_si128(EK_mm , K0); + _mm_storeu_si128(EK_mm + 1, K1); + _mm_storeu_si128(EK_mm + 2, K2); + _mm_storeu_si128(EK_mm + 3, K3); + _mm_storeu_si128(EK_mm + 4, K4); + _mm_storeu_si128(EK_mm + 5, K5); + _mm_storeu_si128(EK_mm + 6, K6); + _mm_storeu_si128(EK_mm + 7, K7); + _mm_storeu_si128(EK_mm + 8, K8); + _mm_storeu_si128(EK_mm + 9, K9); + _mm_storeu_si128(EK_mm + 10, K10); + + // Now generate decryption keys + + __m128i* DK_mm = reinterpret_cast<__m128i*>(m_DK.data()); + _mm_storeu_si128(DK_mm , K10); + _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(K9)); + _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(K8)); + _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(K7)); + _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(K6)); + _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(K5)); + _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(K4)); + _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(K3)); + _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(K2)); + _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(K1)); + _mm_storeu_si128(DK_mm + 10, K0); + } + +/* +* AES-192 Encryption +*/ +BOTAN_FUNC_ISA("ssse3,aes") +void AES_192::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const __m128i* in_mm = reinterpret_cast<const __m128i*>(in); + __m128i* out_mm = reinterpret_cast<__m128i*>(out); + + const __m128i* key_mm = reinterpret_cast<const __m128i*>(m_EK.data()); + + const __m128i K0 = _mm_loadu_si128(key_mm); + const __m128i K1 = _mm_loadu_si128(key_mm + 1); + const __m128i K2 = _mm_loadu_si128(key_mm + 2); + const __m128i K3 = _mm_loadu_si128(key_mm + 3); + const __m128i K4 = _mm_loadu_si128(key_mm + 4); + const __m128i K5 = _mm_loadu_si128(key_mm + 5); + const __m128i K6 = _mm_loadu_si128(key_mm + 6); + const __m128i K7 = _mm_loadu_si128(key_mm + 7); + const __m128i K8 = _mm_loadu_si128(key_mm + 8); + const __m128i K9 = _mm_loadu_si128(key_mm + 9); + const __m128i K10 = _mm_loadu_si128(key_mm + 10); + const __m128i K11 = _mm_loadu_si128(key_mm + 11); + const __m128i K12 = _mm_loadu_si128(key_mm + 12); + + while(blocks >= 4) + { + __m128i B0 = _mm_loadu_si128(in_mm + 0); + __m128i B1 = _mm_loadu_si128(in_mm + 1); + __m128i B2 = _mm_loadu_si128(in_mm + 2); + __m128i B3 = _mm_loadu_si128(in_mm + 3); + + B0 = _mm_xor_si128(B0, K0); + B1 = _mm_xor_si128(B1, K0); + B2 = _mm_xor_si128(B2, K0); + B3 = _mm_xor_si128(B3, K0); + + AES_ENC_4_ROUNDS(K1); + AES_ENC_4_ROUNDS(K2); + AES_ENC_4_ROUNDS(K3); + AES_ENC_4_ROUNDS(K4); + AES_ENC_4_ROUNDS(K5); + AES_ENC_4_ROUNDS(K6); + AES_ENC_4_ROUNDS(K7); + AES_ENC_4_ROUNDS(K8); + AES_ENC_4_ROUNDS(K9); + AES_ENC_4_ROUNDS(K10); + AES_ENC_4_ROUNDS(K11); + AES_ENC_4_LAST_ROUNDS(K12); + + _mm_storeu_si128(out_mm + 0, B0); + _mm_storeu_si128(out_mm + 1, B1); + _mm_storeu_si128(out_mm + 2, B2); + _mm_storeu_si128(out_mm + 3, B3); + + blocks -= 4; + in_mm += 4; + out_mm += 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + __m128i B = _mm_loadu_si128(in_mm + i); + + B = _mm_xor_si128(B, K0); + + B = _mm_aesenc_si128(B, K1); + B = _mm_aesenc_si128(B, K2); + B = _mm_aesenc_si128(B, K3); + B = _mm_aesenc_si128(B, K4); + B = _mm_aesenc_si128(B, K5); + B = _mm_aesenc_si128(B, K6); + B = _mm_aesenc_si128(B, K7); + B = _mm_aesenc_si128(B, K8); + B = _mm_aesenc_si128(B, K9); + B = _mm_aesenc_si128(B, K10); + B = _mm_aesenc_si128(B, K11); + B = _mm_aesenclast_si128(B, K12); + + _mm_storeu_si128(out_mm + i, B); + } + } + +/* +* AES-192 Decryption +*/ +BOTAN_FUNC_ISA("ssse3,aes") +void AES_192::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const __m128i* in_mm = reinterpret_cast<const __m128i*>(in); + __m128i* out_mm = reinterpret_cast<__m128i*>(out); + + const __m128i* key_mm = reinterpret_cast<const __m128i*>(m_DK.data()); + + const __m128i K0 = _mm_loadu_si128(key_mm); + const __m128i K1 = _mm_loadu_si128(key_mm + 1); + const __m128i K2 = _mm_loadu_si128(key_mm + 2); + const __m128i K3 = _mm_loadu_si128(key_mm + 3); + const __m128i K4 = _mm_loadu_si128(key_mm + 4); + const __m128i K5 = _mm_loadu_si128(key_mm + 5); + const __m128i K6 = _mm_loadu_si128(key_mm + 6); + const __m128i K7 = _mm_loadu_si128(key_mm + 7); + const __m128i K8 = _mm_loadu_si128(key_mm + 8); + const __m128i K9 = _mm_loadu_si128(key_mm + 9); + const __m128i K10 = _mm_loadu_si128(key_mm + 10); + const __m128i K11 = _mm_loadu_si128(key_mm + 11); + const __m128i K12 = _mm_loadu_si128(key_mm + 12); + + while(blocks >= 4) + { + __m128i B0 = _mm_loadu_si128(in_mm + 0); + __m128i B1 = _mm_loadu_si128(in_mm + 1); + __m128i B2 = _mm_loadu_si128(in_mm + 2); + __m128i B3 = _mm_loadu_si128(in_mm + 3); + + B0 = _mm_xor_si128(B0, K0); + B1 = _mm_xor_si128(B1, K0); + B2 = _mm_xor_si128(B2, K0); + B3 = _mm_xor_si128(B3, K0); + + AES_DEC_4_ROUNDS(K1); + AES_DEC_4_ROUNDS(K2); + AES_DEC_4_ROUNDS(K3); + AES_DEC_4_ROUNDS(K4); + AES_DEC_4_ROUNDS(K5); + AES_DEC_4_ROUNDS(K6); + AES_DEC_4_ROUNDS(K7); + AES_DEC_4_ROUNDS(K8); + AES_DEC_4_ROUNDS(K9); + AES_DEC_4_ROUNDS(K10); + AES_DEC_4_ROUNDS(K11); + AES_DEC_4_LAST_ROUNDS(K12); + + _mm_storeu_si128(out_mm + 0, B0); + _mm_storeu_si128(out_mm + 1, B1); + _mm_storeu_si128(out_mm + 2, B2); + _mm_storeu_si128(out_mm + 3, B3); + + blocks -= 4; + in_mm += 4; + out_mm += 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + __m128i B = _mm_loadu_si128(in_mm + i); + + B = _mm_xor_si128(B, K0); + + B = _mm_aesdec_si128(B, K1); + B = _mm_aesdec_si128(B, K2); + B = _mm_aesdec_si128(B, K3); + B = _mm_aesdec_si128(B, K4); + B = _mm_aesdec_si128(B, K5); + B = _mm_aesdec_si128(B, K6); + B = _mm_aesdec_si128(B, K7); + B = _mm_aesdec_si128(B, K8); + B = _mm_aesdec_si128(B, K9); + B = _mm_aesdec_si128(B, K10); + B = _mm_aesdec_si128(B, K11); + B = _mm_aesdeclast_si128(B, K12); + + _mm_storeu_si128(out_mm + i, B); + } + } + +/* +* AES-192 Key Schedule +*/ +BOTAN_FUNC_ISA("ssse3,aes") +void AES_192::aesni_key_schedule(const uint8_t key[], size_t) + { + m_EK.resize(52); + m_DK.resize(52); + + __m128i K0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(key)); + __m128i K1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(key + 8)); + K1 = _mm_srli_si128(K1, 8); + + load_le(m_EK.data(), key, 6); + + #define AES_192_key_exp(RCON, EK_OFF) \ + aes_192_key_expansion(&K0, &K1, \ + _mm_aeskeygenassist_si128(K1, RCON), \ + &m_EK[EK_OFF], EK_OFF == 48) + + AES_192_key_exp(0x01, 6); + AES_192_key_exp(0x02, 12); + AES_192_key_exp(0x04, 18); + AES_192_key_exp(0x08, 24); + AES_192_key_exp(0x10, 30); + AES_192_key_exp(0x20, 36); + AES_192_key_exp(0x40, 42); + AES_192_key_exp(0x80, 48); + + #undef AES_192_key_exp + + // Now generate decryption keys + const __m128i* EK_mm = reinterpret_cast<const __m128i*>(m_EK.data()); + + __m128i* DK_mm = reinterpret_cast<__m128i*>(m_DK.data()); + _mm_storeu_si128(DK_mm , _mm_loadu_si128(EK_mm + 12)); + _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 11))); + _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 10))); + _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 9))); + _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 8))); + _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 7))); + _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 6))); + _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 5))); + _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 4))); + _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 3))); + _mm_storeu_si128(DK_mm + 10, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 2))); + _mm_storeu_si128(DK_mm + 11, _mm_aesimc_si128(_mm_loadu_si128(EK_mm + 1))); + _mm_storeu_si128(DK_mm + 12, _mm_loadu_si128(EK_mm + 0)); + } + +/* +* AES-256 Encryption +*/ +BOTAN_FUNC_ISA("ssse3,aes") +void AES_256::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const __m128i* in_mm = reinterpret_cast<const __m128i*>(in); + __m128i* out_mm = reinterpret_cast<__m128i*>(out); + + const __m128i* key_mm = reinterpret_cast<const __m128i*>(m_EK.data()); + + const __m128i K0 = _mm_loadu_si128(key_mm); + const __m128i K1 = _mm_loadu_si128(key_mm + 1); + const __m128i K2 = _mm_loadu_si128(key_mm + 2); + const __m128i K3 = _mm_loadu_si128(key_mm + 3); + const __m128i K4 = _mm_loadu_si128(key_mm + 4); + const __m128i K5 = _mm_loadu_si128(key_mm + 5); + const __m128i K6 = _mm_loadu_si128(key_mm + 6); + const __m128i K7 = _mm_loadu_si128(key_mm + 7); + const __m128i K8 = _mm_loadu_si128(key_mm + 8); + const __m128i K9 = _mm_loadu_si128(key_mm + 9); + const __m128i K10 = _mm_loadu_si128(key_mm + 10); + const __m128i K11 = _mm_loadu_si128(key_mm + 11); + const __m128i K12 = _mm_loadu_si128(key_mm + 12); + const __m128i K13 = _mm_loadu_si128(key_mm + 13); + const __m128i K14 = _mm_loadu_si128(key_mm + 14); + + while(blocks >= 4) + { + __m128i B0 = _mm_loadu_si128(in_mm + 0); + __m128i B1 = _mm_loadu_si128(in_mm + 1); + __m128i B2 = _mm_loadu_si128(in_mm + 2); + __m128i B3 = _mm_loadu_si128(in_mm + 3); + + B0 = _mm_xor_si128(B0, K0); + B1 = _mm_xor_si128(B1, K0); + B2 = _mm_xor_si128(B2, K0); + B3 = _mm_xor_si128(B3, K0); + + AES_ENC_4_ROUNDS(K1); + AES_ENC_4_ROUNDS(K2); + AES_ENC_4_ROUNDS(K3); + AES_ENC_4_ROUNDS(K4); + AES_ENC_4_ROUNDS(K5); + AES_ENC_4_ROUNDS(K6); + AES_ENC_4_ROUNDS(K7); + AES_ENC_4_ROUNDS(K8); + AES_ENC_4_ROUNDS(K9); + AES_ENC_4_ROUNDS(K10); + AES_ENC_4_ROUNDS(K11); + AES_ENC_4_ROUNDS(K12); + AES_ENC_4_ROUNDS(K13); + AES_ENC_4_LAST_ROUNDS(K14); + + _mm_storeu_si128(out_mm + 0, B0); + _mm_storeu_si128(out_mm + 1, B1); + _mm_storeu_si128(out_mm + 2, B2); + _mm_storeu_si128(out_mm + 3, B3); + + blocks -= 4; + in_mm += 4; + out_mm += 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + __m128i B = _mm_loadu_si128(in_mm + i); + + B = _mm_xor_si128(B, K0); + + B = _mm_aesenc_si128(B, K1); + B = _mm_aesenc_si128(B, K2); + B = _mm_aesenc_si128(B, K3); + B = _mm_aesenc_si128(B, K4); + B = _mm_aesenc_si128(B, K5); + B = _mm_aesenc_si128(B, K6); + B = _mm_aesenc_si128(B, K7); + B = _mm_aesenc_si128(B, K8); + B = _mm_aesenc_si128(B, K9); + B = _mm_aesenc_si128(B, K10); + B = _mm_aesenc_si128(B, K11); + B = _mm_aesenc_si128(B, K12); + B = _mm_aesenc_si128(B, K13); + B = _mm_aesenclast_si128(B, K14); + + _mm_storeu_si128(out_mm + i, B); + } + } + +/* +* AES-256 Decryption +*/ +BOTAN_FUNC_ISA("ssse3,aes") +void AES_256::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const __m128i* in_mm = reinterpret_cast<const __m128i*>(in); + __m128i* out_mm = reinterpret_cast<__m128i*>(out); + + const __m128i* key_mm = reinterpret_cast<const __m128i*>(m_DK.data()); + + const __m128i K0 = _mm_loadu_si128(key_mm); + const __m128i K1 = _mm_loadu_si128(key_mm + 1); + const __m128i K2 = _mm_loadu_si128(key_mm + 2); + const __m128i K3 = _mm_loadu_si128(key_mm + 3); + const __m128i K4 = _mm_loadu_si128(key_mm + 4); + const __m128i K5 = _mm_loadu_si128(key_mm + 5); + const __m128i K6 = _mm_loadu_si128(key_mm + 6); + const __m128i K7 = _mm_loadu_si128(key_mm + 7); + const __m128i K8 = _mm_loadu_si128(key_mm + 8); + const __m128i K9 = _mm_loadu_si128(key_mm + 9); + const __m128i K10 = _mm_loadu_si128(key_mm + 10); + const __m128i K11 = _mm_loadu_si128(key_mm + 11); + const __m128i K12 = _mm_loadu_si128(key_mm + 12); + const __m128i K13 = _mm_loadu_si128(key_mm + 13); + const __m128i K14 = _mm_loadu_si128(key_mm + 14); + + while(blocks >= 4) + { + __m128i B0 = _mm_loadu_si128(in_mm + 0); + __m128i B1 = _mm_loadu_si128(in_mm + 1); + __m128i B2 = _mm_loadu_si128(in_mm + 2); + __m128i B3 = _mm_loadu_si128(in_mm + 3); + + B0 = _mm_xor_si128(B0, K0); + B1 = _mm_xor_si128(B1, K0); + B2 = _mm_xor_si128(B2, K0); + B3 = _mm_xor_si128(B3, K0); + + AES_DEC_4_ROUNDS(K1); + AES_DEC_4_ROUNDS(K2); + AES_DEC_4_ROUNDS(K3); + AES_DEC_4_ROUNDS(K4); + AES_DEC_4_ROUNDS(K5); + AES_DEC_4_ROUNDS(K6); + AES_DEC_4_ROUNDS(K7); + AES_DEC_4_ROUNDS(K8); + AES_DEC_4_ROUNDS(K9); + AES_DEC_4_ROUNDS(K10); + AES_DEC_4_ROUNDS(K11); + AES_DEC_4_ROUNDS(K12); + AES_DEC_4_ROUNDS(K13); + AES_DEC_4_LAST_ROUNDS(K14); + + _mm_storeu_si128(out_mm + 0, B0); + _mm_storeu_si128(out_mm + 1, B1); + _mm_storeu_si128(out_mm + 2, B2); + _mm_storeu_si128(out_mm + 3, B3); + + blocks -= 4; + in_mm += 4; + out_mm += 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + __m128i B = _mm_loadu_si128(in_mm + i); + + B = _mm_xor_si128(B, K0); + + B = _mm_aesdec_si128(B, K1); + B = _mm_aesdec_si128(B, K2); + B = _mm_aesdec_si128(B, K3); + B = _mm_aesdec_si128(B, K4); + B = _mm_aesdec_si128(B, K5); + B = _mm_aesdec_si128(B, K6); + B = _mm_aesdec_si128(B, K7); + B = _mm_aesdec_si128(B, K8); + B = _mm_aesdec_si128(B, K9); + B = _mm_aesdec_si128(B, K10); + B = _mm_aesdec_si128(B, K11); + B = _mm_aesdec_si128(B, K12); + B = _mm_aesdec_si128(B, K13); + B = _mm_aesdeclast_si128(B, K14); + + _mm_storeu_si128(out_mm + i, B); + } + } + +/* +* AES-256 Key Schedule +*/ +BOTAN_FUNC_ISA("ssse3,aes") +void AES_256::aesni_key_schedule(const uint8_t key[], size_t) + { + m_EK.resize(60); + m_DK.resize(60); + + const __m128i K0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(key)); + const __m128i K1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(key + 16)); + + const __m128i K2 = aes_128_key_expansion(K0, _mm_aeskeygenassist_si128(K1, 0x01)); + const __m128i K3 = aes_256_key_expansion(K1, K2); + + const __m128i K4 = aes_128_key_expansion(K2, _mm_aeskeygenassist_si128(K3, 0x02)); + const __m128i K5 = aes_256_key_expansion(K3, K4); + + const __m128i K6 = aes_128_key_expansion(K4, _mm_aeskeygenassist_si128(K5, 0x04)); + const __m128i K7 = aes_256_key_expansion(K5, K6); + + const __m128i K8 = aes_128_key_expansion(K6, _mm_aeskeygenassist_si128(K7, 0x08)); + const __m128i K9 = aes_256_key_expansion(K7, K8); + + const __m128i K10 = aes_128_key_expansion(K8, _mm_aeskeygenassist_si128(K9, 0x10)); + const __m128i K11 = aes_256_key_expansion(K9, K10); + + const __m128i K12 = aes_128_key_expansion(K10, _mm_aeskeygenassist_si128(K11, 0x20)); + const __m128i K13 = aes_256_key_expansion(K11, K12); + + const __m128i K14 = aes_128_key_expansion(K12, _mm_aeskeygenassist_si128(K13, 0x40)); + + __m128i* EK_mm = reinterpret_cast<__m128i*>(m_EK.data()); + _mm_storeu_si128(EK_mm , K0); + _mm_storeu_si128(EK_mm + 1, K1); + _mm_storeu_si128(EK_mm + 2, K2); + _mm_storeu_si128(EK_mm + 3, K3); + _mm_storeu_si128(EK_mm + 4, K4); + _mm_storeu_si128(EK_mm + 5, K5); + _mm_storeu_si128(EK_mm + 6, K6); + _mm_storeu_si128(EK_mm + 7, K7); + _mm_storeu_si128(EK_mm + 8, K8); + _mm_storeu_si128(EK_mm + 9, K9); + _mm_storeu_si128(EK_mm + 10, K10); + _mm_storeu_si128(EK_mm + 11, K11); + _mm_storeu_si128(EK_mm + 12, K12); + _mm_storeu_si128(EK_mm + 13, K13); + _mm_storeu_si128(EK_mm + 14, K14); + + // Now generate decryption keys + __m128i* DK_mm = reinterpret_cast<__m128i*>(m_DK.data()); + _mm_storeu_si128(DK_mm , K14); + _mm_storeu_si128(DK_mm + 1, _mm_aesimc_si128(K13)); + _mm_storeu_si128(DK_mm + 2, _mm_aesimc_si128(K12)); + _mm_storeu_si128(DK_mm + 3, _mm_aesimc_si128(K11)); + _mm_storeu_si128(DK_mm + 4, _mm_aesimc_si128(K10)); + _mm_storeu_si128(DK_mm + 5, _mm_aesimc_si128(K9)); + _mm_storeu_si128(DK_mm + 6, _mm_aesimc_si128(K8)); + _mm_storeu_si128(DK_mm + 7, _mm_aesimc_si128(K7)); + _mm_storeu_si128(DK_mm + 8, _mm_aesimc_si128(K6)); + _mm_storeu_si128(DK_mm + 9, _mm_aesimc_si128(K5)); + _mm_storeu_si128(DK_mm + 10, _mm_aesimc_si128(K4)); + _mm_storeu_si128(DK_mm + 11, _mm_aesimc_si128(K3)); + _mm_storeu_si128(DK_mm + 12, _mm_aesimc_si128(K2)); + _mm_storeu_si128(DK_mm + 13, _mm_aesimc_si128(K1)); + _mm_storeu_si128(DK_mm + 14, K0); + } + +#undef AES_ENC_4_ROUNDS +#undef AES_ENC_4_LAST_ROUNDS +#undef AES_DEC_4_ROUNDS +#undef AES_DEC_4_LAST_ROUNDS + +} diff --git a/comm/third_party/botan/src/lib/block/aes/aes_ni/info.txt b/comm/third_party/botan/src/lib/block/aes/aes_ni/info.txt new file mode 100644 index 0000000000..2e9749fb8e --- /dev/null +++ b/comm/third_party/botan/src/lib/block/aes/aes_ni/info.txt @@ -0,0 +1,9 @@ +<defines> +AES_NI -> 20131128 +</defines> + +<isa> +sse2 +ssse3 +aesni +</isa> diff --git a/comm/third_party/botan/src/lib/block/aes/aes_power8/aes_power8.cpp b/comm/third_party/botan/src/lib/block/aes/aes_power8/aes_power8.cpp new file mode 100644 index 0000000000..18bc85933b --- /dev/null +++ b/comm/third_party/botan/src/lib/block/aes/aes_power8/aes_power8.cpp @@ -0,0 +1,529 @@ +/* +* AES using POWER8/POWER9 crypto extensions +* +* Contributed by Jeffrey Walton +* +* Further changes +* (C) 2018,2019 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/aes.h> +#include <botan/cpuid.h> + +#include <altivec.h> +#undef vector +#undef bool + +namespace Botan { + +typedef __vector unsigned long long Altivec64x2; +typedef __vector unsigned int Altivec32x4; +typedef __vector unsigned char Altivec8x16; + +namespace { + +inline Altivec8x16 reverse_vec(Altivec8x16 src) + { + if(CPUID::is_little_endian()) + { + const Altivec8x16 mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0}; + const Altivec8x16 zero = {0}; + return vec_perm(src, zero, mask); + } + else + { + return src; + } + } + +inline Altivec64x2 load_key(const uint32_t key[]) + { + return (Altivec64x2)reverse_vec((Altivec8x16)vec_vsx_ld(0, key));; + } + +inline Altivec64x2 load_block(const uint8_t src[]) + { + return (Altivec64x2)reverse_vec(vec_vsx_ld(0, src)); + } + +inline void store_block(Altivec64x2 src, uint8_t dest[]) + { + vec_vsx_st(reverse_vec((Altivec8x16)src), 0, dest); + } + +inline void store_blocks(Altivec64x2 B0, Altivec64x2 B1, + Altivec64x2 B2, Altivec64x2 B3, + uint8_t out[]) + { + store_block(B0, out); + store_block(B1, out+16); + store_block(B2, out+16*2); + store_block(B3, out+16*3); + } + +#define AES_XOR_4(B0, B1, B2, B3, K) do { \ + B0 = vec_xor(B0, K); \ + B1 = vec_xor(B1, K); \ + B2 = vec_xor(B2, K); \ + B3 = vec_xor(B3, K); \ + } while(0) + +#define AES_ENCRYPT_4(B0, B1, B2, B3, K) do { \ + B0 = __builtin_crypto_vcipher(B0, K); \ + B1 = __builtin_crypto_vcipher(B1, K); \ + B2 = __builtin_crypto_vcipher(B2, K); \ + B3 = __builtin_crypto_vcipher(B3, K); \ + } while(0) + +#define AES_ENCRYPT_4_LAST(B0, B1, B2, B3, K) do { \ + B0 = __builtin_crypto_vcipherlast(B0, K); \ + B1 = __builtin_crypto_vcipherlast(B1, K); \ + B2 = __builtin_crypto_vcipherlast(B2, K); \ + B3 = __builtin_crypto_vcipherlast(B3, K); \ + } while(0) + +#define AES_DECRYPT_4(B0, B1, B2, B3, K) do { \ + B0 = __builtin_crypto_vncipher(B0, K); \ + B1 = __builtin_crypto_vncipher(B1, K); \ + B2 = __builtin_crypto_vncipher(B2, K); \ + B3 = __builtin_crypto_vncipher(B3, K); \ + } while(0) + +#define AES_DECRYPT_4_LAST(B0, B1, B2, B3, K) do { \ + B0 = __builtin_crypto_vncipherlast(B0, K); \ + B1 = __builtin_crypto_vncipherlast(B1, K); \ + B2 = __builtin_crypto_vncipherlast(B2, K); \ + B3 = __builtin_crypto_vncipherlast(B3, K); \ + } while(0) + +} + +BOTAN_FUNC_ISA("crypto") +void AES_128::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const Altivec64x2 K0 = load_key(&m_EK[0]); + const Altivec64x2 K1 = load_key(&m_EK[4]); + const Altivec64x2 K2 = load_key(&m_EK[8]); + const Altivec64x2 K3 = load_key(&m_EK[12]); + const Altivec64x2 K4 = load_key(&m_EK[16]); + const Altivec64x2 K5 = load_key(&m_EK[20]); + const Altivec64x2 K6 = load_key(&m_EK[24]); + const Altivec64x2 K7 = load_key(&m_EK[28]); + const Altivec64x2 K8 = load_key(&m_EK[32]); + const Altivec64x2 K9 = load_key(&m_EK[36]); + const Altivec64x2 K10 = load_key(&m_EK[40]); + + while(blocks >= 4) + { + Altivec64x2 B0 = load_block(in); + Altivec64x2 B1 = load_block(in+16); + Altivec64x2 B2 = load_block(in+16*2); + Altivec64x2 B3 = load_block(in+16*3); + + AES_XOR_4(B0, B1, B2, B3, K0); + AES_ENCRYPT_4(B0, B1, B2, B3, K1); + AES_ENCRYPT_4(B0, B1, B2, B3, K2); + AES_ENCRYPT_4(B0, B1, B2, B3, K3); + AES_ENCRYPT_4(B0, B1, B2, B3, K4); + AES_ENCRYPT_4(B0, B1, B2, B3, K5); + AES_ENCRYPT_4(B0, B1, B2, B3, K6); + AES_ENCRYPT_4(B0, B1, B2, B3, K7); + AES_ENCRYPT_4(B0, B1, B2, B3, K8); + AES_ENCRYPT_4(B0, B1, B2, B3, K9); + AES_ENCRYPT_4_LAST(B0, B1, B2, B3, K10); + + store_blocks(B0, B1, B2, B3, out); + + out += 4*16; + in += 4*16; + blocks -= 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + Altivec64x2 B = load_block(in); + + B = vec_xor(B, K0); + B = __builtin_crypto_vcipher(B, K1); + B = __builtin_crypto_vcipher(B, K2); + B = __builtin_crypto_vcipher(B, K3); + B = __builtin_crypto_vcipher(B, K4); + B = __builtin_crypto_vcipher(B, K5); + B = __builtin_crypto_vcipher(B, K6); + B = __builtin_crypto_vcipher(B, K7); + B = __builtin_crypto_vcipher(B, K8); + B = __builtin_crypto_vcipher(B, K9); + B = __builtin_crypto_vcipherlast(B, K10); + + store_block(B, out); + + out += 16; + in += 16; + } + } + +BOTAN_FUNC_ISA("crypto") +void AES_128::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const Altivec64x2 K0 = load_key(&m_EK[40]); + const Altivec64x2 K1 = load_key(&m_EK[36]); + const Altivec64x2 K2 = load_key(&m_EK[32]); + const Altivec64x2 K3 = load_key(&m_EK[28]); + const Altivec64x2 K4 = load_key(&m_EK[24]); + const Altivec64x2 K5 = load_key(&m_EK[20]); + const Altivec64x2 K6 = load_key(&m_EK[16]); + const Altivec64x2 K7 = load_key(&m_EK[12]); + const Altivec64x2 K8 = load_key(&m_EK[8]); + const Altivec64x2 K9 = load_key(&m_EK[4]); + const Altivec64x2 K10 = load_key(&m_EK[0]); + + while(blocks >= 4) + { + Altivec64x2 B0 = load_block(in); + Altivec64x2 B1 = load_block(in+16); + Altivec64x2 B2 = load_block(in+16*2); + Altivec64x2 B3 = load_block(in+16*3); + + AES_XOR_4(B0, B1, B2, B3, K0); + AES_DECRYPT_4(B0, B1, B2, B3, K1); + AES_DECRYPT_4(B0, B1, B2, B3, K2); + AES_DECRYPT_4(B0, B1, B2, B3, K3); + AES_DECRYPT_4(B0, B1, B2, B3, K4); + AES_DECRYPT_4(B0, B1, B2, B3, K5); + AES_DECRYPT_4(B0, B1, B2, B3, K6); + AES_DECRYPT_4(B0, B1, B2, B3, K7); + AES_DECRYPT_4(B0, B1, B2, B3, K8); + AES_DECRYPT_4(B0, B1, B2, B3, K9); + AES_DECRYPT_4_LAST(B0, B1, B2, B3, K10); + + store_blocks(B0, B1, B2, B3, out); + + out += 4*16; + in += 4*16; + blocks -= 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + Altivec64x2 B = load_block(in); + + B = vec_xor(B, K0); + B = __builtin_crypto_vncipher(B, K1); + B = __builtin_crypto_vncipher(B, K2); + B = __builtin_crypto_vncipher(B, K3); + B = __builtin_crypto_vncipher(B, K4); + B = __builtin_crypto_vncipher(B, K5); + B = __builtin_crypto_vncipher(B, K6); + B = __builtin_crypto_vncipher(B, K7); + B = __builtin_crypto_vncipher(B, K8); + B = __builtin_crypto_vncipher(B, K9); + B = __builtin_crypto_vncipherlast(B, K10); + + store_block(B, out); + + out += 16; + in += 16; + } + } + +BOTAN_FUNC_ISA("crypto") +void AES_192::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const Altivec64x2 K0 = load_key(&m_EK[0]); + const Altivec64x2 K1 = load_key(&m_EK[4]); + const Altivec64x2 K2 = load_key(&m_EK[8]); + const Altivec64x2 K3 = load_key(&m_EK[12]); + const Altivec64x2 K4 = load_key(&m_EK[16]); + const Altivec64x2 K5 = load_key(&m_EK[20]); + const Altivec64x2 K6 = load_key(&m_EK[24]); + const Altivec64x2 K7 = load_key(&m_EK[28]); + const Altivec64x2 K8 = load_key(&m_EK[32]); + const Altivec64x2 K9 = load_key(&m_EK[36]); + const Altivec64x2 K10 = load_key(&m_EK[40]); + const Altivec64x2 K11 = load_key(&m_EK[44]); + const Altivec64x2 K12 = load_key(&m_EK[48]); + + while(blocks >= 4) + { + Altivec64x2 B0 = load_block(in); + Altivec64x2 B1 = load_block(in+16); + Altivec64x2 B2 = load_block(in+16*2); + Altivec64x2 B3 = load_block(in+16*3); + + AES_XOR_4(B0, B1, B2, B3, K0); + AES_ENCRYPT_4(B0, B1, B2, B3, K1); + AES_ENCRYPT_4(B0, B1, B2, B3, K2); + AES_ENCRYPT_4(B0, B1, B2, B3, K3); + AES_ENCRYPT_4(B0, B1, B2, B3, K4); + AES_ENCRYPT_4(B0, B1, B2, B3, K5); + AES_ENCRYPT_4(B0, B1, B2, B3, K6); + AES_ENCRYPT_4(B0, B1, B2, B3, K7); + AES_ENCRYPT_4(B0, B1, B2, B3, K8); + AES_ENCRYPT_4(B0, B1, B2, B3, K9); + AES_ENCRYPT_4(B0, B1, B2, B3, K10); + AES_ENCRYPT_4(B0, B1, B2, B3, K11); + AES_ENCRYPT_4_LAST(B0, B1, B2, B3, K12); + + store_blocks(B0, B1, B2, B3, out); + + out += 4*16; + in += 4*16; + blocks -= 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + Altivec64x2 B = load_block(in); + + B = vec_xor(B, K0); + B = __builtin_crypto_vcipher(B, K1); + B = __builtin_crypto_vcipher(B, K2); + B = __builtin_crypto_vcipher(B, K3); + B = __builtin_crypto_vcipher(B, K4); + B = __builtin_crypto_vcipher(B, K5); + B = __builtin_crypto_vcipher(B, K6); + B = __builtin_crypto_vcipher(B, K7); + B = __builtin_crypto_vcipher(B, K8); + B = __builtin_crypto_vcipher(B, K9); + B = __builtin_crypto_vcipher(B, K10); + B = __builtin_crypto_vcipher(B, K11); + B = __builtin_crypto_vcipherlast(B, K12); + + store_block(B, out); + + out += 16; + in += 16; + } + } + +BOTAN_FUNC_ISA("crypto") +void AES_192::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const Altivec64x2 K0 = load_key(&m_EK[48]); + const Altivec64x2 K1 = load_key(&m_EK[44]); + const Altivec64x2 K2 = load_key(&m_EK[40]); + const Altivec64x2 K3 = load_key(&m_EK[36]); + const Altivec64x2 K4 = load_key(&m_EK[32]); + const Altivec64x2 K5 = load_key(&m_EK[28]); + const Altivec64x2 K6 = load_key(&m_EK[24]); + const Altivec64x2 K7 = load_key(&m_EK[20]); + const Altivec64x2 K8 = load_key(&m_EK[16]); + const Altivec64x2 K9 = load_key(&m_EK[12]); + const Altivec64x2 K10 = load_key(&m_EK[8]); + const Altivec64x2 K11 = load_key(&m_EK[4]); + const Altivec64x2 K12 = load_key(&m_EK[0]); + + while(blocks >= 4) + { + Altivec64x2 B0 = load_block(in); + Altivec64x2 B1 = load_block(in+16); + Altivec64x2 B2 = load_block(in+16*2); + Altivec64x2 B3 = load_block(in+16*3); + + AES_XOR_4(B0, B1, B2, B3, K0); + AES_DECRYPT_4(B0, B1, B2, B3, K1); + AES_DECRYPT_4(B0, B1, B2, B3, K2); + AES_DECRYPT_4(B0, B1, B2, B3, K3); + AES_DECRYPT_4(B0, B1, B2, B3, K4); + AES_DECRYPT_4(B0, B1, B2, B3, K5); + AES_DECRYPT_4(B0, B1, B2, B3, K6); + AES_DECRYPT_4(B0, B1, B2, B3, K7); + AES_DECRYPT_4(B0, B1, B2, B3, K8); + AES_DECRYPT_4(B0, B1, B2, B3, K9); + AES_DECRYPT_4(B0, B1, B2, B3, K10); + AES_DECRYPT_4(B0, B1, B2, B3, K11); + AES_DECRYPT_4_LAST(B0, B1, B2, B3, K12); + + store_blocks(B0, B1, B2, B3, out); + + out += 4*16; + in += 4*16; + blocks -= 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + Altivec64x2 B = load_block(in); + + B = vec_xor(B, K0); + B = __builtin_crypto_vncipher(B, K1); + B = __builtin_crypto_vncipher(B, K2); + B = __builtin_crypto_vncipher(B, K3); + B = __builtin_crypto_vncipher(B, K4); + B = __builtin_crypto_vncipher(B, K5); + B = __builtin_crypto_vncipher(B, K6); + B = __builtin_crypto_vncipher(B, K7); + B = __builtin_crypto_vncipher(B, K8); + B = __builtin_crypto_vncipher(B, K9); + B = __builtin_crypto_vncipher(B, K10); + B = __builtin_crypto_vncipher(B, K11); + B = __builtin_crypto_vncipherlast(B, K12); + + store_block(B, out); + + out += 16; + in += 16; + } + } + +BOTAN_FUNC_ISA("crypto") +void AES_256::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const Altivec64x2 K0 = load_key(&m_EK[0]); + const Altivec64x2 K1 = load_key(&m_EK[4]); + const Altivec64x2 K2 = load_key(&m_EK[8]); + const Altivec64x2 K3 = load_key(&m_EK[12]); + const Altivec64x2 K4 = load_key(&m_EK[16]); + const Altivec64x2 K5 = load_key(&m_EK[20]); + const Altivec64x2 K6 = load_key(&m_EK[24]); + const Altivec64x2 K7 = load_key(&m_EK[28]); + const Altivec64x2 K8 = load_key(&m_EK[32]); + const Altivec64x2 K9 = load_key(&m_EK[36]); + const Altivec64x2 K10 = load_key(&m_EK[40]); + const Altivec64x2 K11 = load_key(&m_EK[44]); + const Altivec64x2 K12 = load_key(&m_EK[48]); + const Altivec64x2 K13 = load_key(&m_EK[52]); + const Altivec64x2 K14 = load_key(&m_EK[56]); + + while(blocks >= 4) + { + Altivec64x2 B0 = load_block(in); + Altivec64x2 B1 = load_block(in+16); + Altivec64x2 B2 = load_block(in+16*2); + Altivec64x2 B3 = load_block(in+16*3); + + AES_XOR_4(B0, B1, B2, B3, K0); + AES_ENCRYPT_4(B0, B1, B2, B3, K1); + AES_ENCRYPT_4(B0, B1, B2, B3, K2); + AES_ENCRYPT_4(B0, B1, B2, B3, K3); + AES_ENCRYPT_4(B0, B1, B2, B3, K4); + AES_ENCRYPT_4(B0, B1, B2, B3, K5); + AES_ENCRYPT_4(B0, B1, B2, B3, K6); + AES_ENCRYPT_4(B0, B1, B2, B3, K7); + AES_ENCRYPT_4(B0, B1, B2, B3, K8); + AES_ENCRYPT_4(B0, B1, B2, B3, K9); + AES_ENCRYPT_4(B0, B1, B2, B3, K10); + AES_ENCRYPT_4(B0, B1, B2, B3, K11); + AES_ENCRYPT_4(B0, B1, B2, B3, K12); + AES_ENCRYPT_4(B0, B1, B2, B3, K13); + AES_ENCRYPT_4_LAST(B0, B1, B2, B3, K14); + + store_blocks(B0, B1, B2, B3, out); + + out += 4*16; + in += 4*16; + blocks -= 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + Altivec64x2 B = load_block(in); + + B = vec_xor(B, K0); + B = __builtin_crypto_vcipher(B, K1); + B = __builtin_crypto_vcipher(B, K2); + B = __builtin_crypto_vcipher(B, K3); + B = __builtin_crypto_vcipher(B, K4); + B = __builtin_crypto_vcipher(B, K5); + B = __builtin_crypto_vcipher(B, K6); + B = __builtin_crypto_vcipher(B, K7); + B = __builtin_crypto_vcipher(B, K8); + B = __builtin_crypto_vcipher(B, K9); + B = __builtin_crypto_vcipher(B, K10); + B = __builtin_crypto_vcipher(B, K11); + B = __builtin_crypto_vcipher(B, K12); + B = __builtin_crypto_vcipher(B, K13); + B = __builtin_crypto_vcipherlast(B, K14); + + store_block(B, out); + + out += 16; + in += 16; + } + } + +BOTAN_FUNC_ISA("crypto") +void AES_256::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const Altivec64x2 K0 = load_key(&m_EK[56]); + const Altivec64x2 K1 = load_key(&m_EK[52]); + const Altivec64x2 K2 = load_key(&m_EK[48]); + const Altivec64x2 K3 = load_key(&m_EK[44]); + const Altivec64x2 K4 = load_key(&m_EK[40]); + const Altivec64x2 K5 = load_key(&m_EK[36]); + const Altivec64x2 K6 = load_key(&m_EK[32]); + const Altivec64x2 K7 = load_key(&m_EK[28]); + const Altivec64x2 K8 = load_key(&m_EK[24]); + const Altivec64x2 K9 = load_key(&m_EK[20]); + const Altivec64x2 K10 = load_key(&m_EK[16]); + const Altivec64x2 K11 = load_key(&m_EK[12]); + const Altivec64x2 K12 = load_key(&m_EK[8]); + const Altivec64x2 K13 = load_key(&m_EK[4]); + const Altivec64x2 K14 = load_key(&m_EK[0]); + + while(blocks >= 4) + { + Altivec64x2 B0 = load_block(in); + Altivec64x2 B1 = load_block(in+16); + Altivec64x2 B2 = load_block(in+16*2); + Altivec64x2 B3 = load_block(in+16*3); + + AES_XOR_4(B0, B1, B2, B3, K0); + AES_DECRYPT_4(B0, B1, B2, B3, K1); + AES_DECRYPT_4(B0, B1, B2, B3, K2); + AES_DECRYPT_4(B0, B1, B2, B3, K3); + AES_DECRYPT_4(B0, B1, B2, B3, K4); + AES_DECRYPT_4(B0, B1, B2, B3, K5); + AES_DECRYPT_4(B0, B1, B2, B3, K6); + AES_DECRYPT_4(B0, B1, B2, B3, K7); + AES_DECRYPT_4(B0, B1, B2, B3, K8); + AES_DECRYPT_4(B0, B1, B2, B3, K9); + AES_DECRYPT_4(B0, B1, B2, B3, K10); + AES_DECRYPT_4(B0, B1, B2, B3, K11); + AES_DECRYPT_4(B0, B1, B2, B3, K12); + AES_DECRYPT_4(B0, B1, B2, B3, K13); + AES_DECRYPT_4_LAST(B0, B1, B2, B3, K14); + + store_blocks(B0, B1, B2, B3, out); + + out += 4*16; + in += 4*16; + blocks -= 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + Altivec64x2 B = load_block(in); + + B = vec_xor(B, K0); + B = __builtin_crypto_vncipher(B, K1); + B = __builtin_crypto_vncipher(B, K2); + B = __builtin_crypto_vncipher(B, K3); + B = __builtin_crypto_vncipher(B, K4); + B = __builtin_crypto_vncipher(B, K5); + B = __builtin_crypto_vncipher(B, K6); + B = __builtin_crypto_vncipher(B, K7); + B = __builtin_crypto_vncipher(B, K8); + B = __builtin_crypto_vncipher(B, K9); + B = __builtin_crypto_vncipher(B, K10); + B = __builtin_crypto_vncipher(B, K11); + B = __builtin_crypto_vncipher(B, K12); + B = __builtin_crypto_vncipher(B, K13); + B = __builtin_crypto_vncipherlast(B, K14); + + store_block(B, out); + + out += 16; + in += 16; + } + } + +#undef AES_XOR_4 +#undef AES_ENCRYPT_4 +#undef AES_ENCRYPT_4_LAST +#undef AES_DECRYPT_4 +#undef AES_DECRYPT_4_LAST + +} diff --git a/comm/third_party/botan/src/lib/block/aes/aes_power8/info.txt b/comm/third_party/botan/src/lib/block/aes/aes_power8/info.txt new file mode 100644 index 0000000000..df569edd50 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/aes/aes_power8/info.txt @@ -0,0 +1,11 @@ +<defines> +AES_POWER8 -> 20180223 +</defines> + +<arch> +ppc64 +</arch> + +<isa> +powercrypto +</isa> diff --git a/comm/third_party/botan/src/lib/block/aes/aes_vperm/aes_vperm.cpp b/comm/third_party/botan/src/lib/block/aes/aes_vperm/aes_vperm.cpp new file mode 100644 index 0000000000..4ae6bb2236 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/aes/aes_vperm/aes_vperm.cpp @@ -0,0 +1,627 @@ +/* +* AES using vector permutes (SSSE3, NEON) +* (C) 2010,2016,2019 Jack Lloyd +* +* Based on public domain x86-64 assembly written by Mike Hamburg, +* described in "Accelerating AES with Vector Permute Instructions" +* (CHES 2009). His original code is available at +* https://crypto.stanford.edu/vpaes/ +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/aes.h> +#include <botan/internal/ct_utils.h> +#include <botan/internal/simd_32.h> + +#if defined(BOTAN_SIMD_USE_SSE2) + #include <tmmintrin.h> +#endif + +namespace Botan { + +namespace { + +inline SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) shuffle(SIMD_4x32 a, SIMD_4x32 b) + { +#if defined(BOTAN_SIMD_USE_SSE2) + return SIMD_4x32(_mm_shuffle_epi8(a.raw(), b.raw())); +#elif defined(BOTAN_SIMD_USE_NEON) + const uint8x16_t tbl = vreinterpretq_u8_u32(a.raw()); + const uint8x16_t idx = vreinterpretq_u8_u32(b.raw()); + +#if defined(BOTAN_TARGET_ARCH_IS_ARM32) + const uint8x8x2_t tbl2 = { vget_low_u8(tbl), vget_high_u8(tbl) }; + + return SIMD_4x32(vreinterpretq_u32_u8( + vcombine_u8(vtbl2_u8(tbl2, vget_low_u8(idx)), + vtbl2_u8(tbl2, vget_high_u8(idx))))); + +#else + return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(tbl, idx))); +#endif + +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + + const auto zero = vec_splat_s8(0x00); + const auto mask = vec_cmplt((__vector signed char)b.raw(), zero); + const auto r = vec_perm((__vector signed char)a.raw(), (__vector signed char)a.raw(), (__vector unsigned char)b.raw()); + return SIMD_4x32((__vector unsigned int)vec_sel(r, zero, mask)); + +#else + #error "No shuffle implementation available" +#endif + } + +inline SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) alignr8(SIMD_4x32 a, SIMD_4x32 b) + { +#if defined(BOTAN_SIMD_USE_SSE2) + return SIMD_4x32(_mm_alignr_epi8(a.raw(), b.raw(), 8)); +#elif defined(BOTAN_SIMD_USE_NEON) + return SIMD_4x32(vextq_u32(b.raw(), a.raw(), 2)); +#elif defined(BOTAN_SIMD_USE_ALTIVEC) + const __vector unsigned char mask = {8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23}; + return SIMD_4x32(vec_perm(b.raw(), a.raw(), mask)); +#else + #error "No alignr8 implementation available" +#endif + } + +const SIMD_4x32 k_ipt1 = SIMD_4x32(0x5A2A7000, 0xC2B2E898, 0x52227808, 0xCABAE090); +const SIMD_4x32 k_ipt2 = SIMD_4x32(0x317C4D00, 0x4C01307D, 0xB0FDCC81, 0xCD80B1FC); + +const SIMD_4x32 k_inv1 = SIMD_4x32(0x0D080180, 0x0E05060F, 0x0A0B0C02, 0x04070309); +const SIMD_4x32 k_inv2 = SIMD_4x32(0x0F0B0780, 0x01040A06, 0x02050809, 0x030D0E0C); + +const SIMD_4x32 sb1u = SIMD_4x32(0xCB503E00, 0xB19BE18F, 0x142AF544, 0xA5DF7A6E); +const SIMD_4x32 sb1t = SIMD_4x32(0xFAE22300, 0x3618D415, 0x0D2ED9EF, 0x3BF7CCC1); +const SIMD_4x32 sbou = SIMD_4x32(0x6FBDC700, 0xD0D26D17, 0xC502A878, 0x15AABF7A); +const SIMD_4x32 sbot = SIMD_4x32(0x5FBB6A00, 0xCFE474A5, 0x412B35FA, 0x8E1E90D1); + +const SIMD_4x32 sboud = SIMD_4x32(0x7EF94000, 0x1387EA53, 0xD4943E2D, 0xC7AA6DB9); +const SIMD_4x32 sbotd = SIMD_4x32(0x93441D00, 0x12D7560F, 0xD8C58E9C, 0xCA4B8159); + +const SIMD_4x32 mc_forward[4] = { + SIMD_4x32(0x00030201, 0x04070605, 0x080B0A09, 0x0C0F0E0D), + SIMD_4x32(0x04070605, 0x080B0A09, 0x0C0F0E0D, 0x00030201), + SIMD_4x32(0x080B0A09, 0x0C0F0E0D, 0x00030201, 0x04070605), + SIMD_4x32(0x0C0F0E0D, 0x00030201, 0x04070605, 0x080B0A09) +}; + +const SIMD_4x32 vperm_sr[4] = { + SIMD_4x32(0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C), + SIMD_4x32(0x0F0A0500, 0x030E0904, 0x07020D08, 0x0B06010C), + SIMD_4x32(0x0B020900, 0x0F060D04, 0x030A0108, 0x070E050C), + SIMD_4x32(0x070A0D00, 0x0B0E0104, 0x0F020508, 0x0306090C), +}; + +const SIMD_4x32 rcon[10] = { + SIMD_4x32(0x00000070, 0x00000000, 0x00000000, 0x00000000), + SIMD_4x32(0x0000002A, 0x00000000, 0x00000000, 0x00000000), + SIMD_4x32(0x00000098, 0x00000000, 0x00000000, 0x00000000), + SIMD_4x32(0x00000008, 0x00000000, 0x00000000, 0x00000000), + SIMD_4x32(0x0000004D, 0x00000000, 0x00000000, 0x00000000), + SIMD_4x32(0x0000007C, 0x00000000, 0x00000000, 0x00000000), + SIMD_4x32(0x0000007D, 0x00000000, 0x00000000, 0x00000000), + SIMD_4x32(0x00000081, 0x00000000, 0x00000000, 0x00000000), + SIMD_4x32(0x0000001F, 0x00000000, 0x00000000, 0x00000000), + SIMD_4x32(0x00000083, 0x00000000, 0x00000000, 0x00000000), +}; + +const SIMD_4x32 sb2u = SIMD_4x32(0x0B712400, 0xE27A93C6, 0xBC982FCD, 0x5EB7E955); +const SIMD_4x32 sb2t = SIMD_4x32(0x0AE12900, 0x69EB8840, 0xAB82234A, 0xC2A163C8); + +const SIMD_4x32 k_dipt1 = SIMD_4x32(0x0B545F00, 0x0F505B04, 0x114E451A, 0x154A411E); +const SIMD_4x32 k_dipt2 = SIMD_4x32(0x60056500, 0x86E383E6, 0xF491F194, 0x12771772); + +const SIMD_4x32 sb9u = SIMD_4x32(0x9A86D600, 0x851C0353, 0x4F994CC9, 0xCAD51F50); +const SIMD_4x32 sb9t = SIMD_4x32(0xECD74900, 0xC03B1789, 0xB2FBA565, 0x725E2C9E); + +const SIMD_4x32 sbeu = SIMD_4x32(0x26D4D000, 0x46F29296, 0x64B4F6B0, 0x22426004); +const SIMD_4x32 sbet = SIMD_4x32(0xFFAAC100, 0x0C55A6CD, 0x98593E32, 0x9467F36B); + +const SIMD_4x32 sbdu = SIMD_4x32(0xE6B1A200, 0x7D57CCDF, 0x882A4439, 0xF56E9B13); +const SIMD_4x32 sbdt = SIMD_4x32(0x24C6CB00, 0x3CE2FAF7, 0x15DEEFD3, 0x2931180D); + +const SIMD_4x32 sbbu = SIMD_4x32(0x96B44200, 0xD0226492, 0xB0F2D404, 0x602646F6); +const SIMD_4x32 sbbt = SIMD_4x32(0xCD596700, 0xC19498A6, 0x3255AA6B, 0xF3FF0C3E); + +const SIMD_4x32 mcx[4] = { + SIMD_4x32(0x0C0F0E0D, 0x00030201, 0x04070605, 0x080B0A09), + SIMD_4x32(0x080B0A09, 0x0C0F0E0D, 0x00030201, 0x04070605), + SIMD_4x32(0x04070605, 0x080B0A09, 0x0C0F0E0D, 0x00030201), + SIMD_4x32(0x00030201, 0x04070605, 0x080B0A09, 0x0C0F0E0D), +}; + +const SIMD_4x32 mc_backward[4] = { + SIMD_4x32(0x02010003, 0x06050407, 0x0A09080B, 0x0E0D0C0F), + SIMD_4x32(0x0E0D0C0F, 0x02010003, 0x06050407, 0x0A09080B), + SIMD_4x32(0x0A09080B, 0x0E0D0C0F, 0x02010003, 0x06050407), + SIMD_4x32(0x06050407, 0x0A09080B, 0x0E0D0C0F, 0x02010003), +}; + +const SIMD_4x32 lo_nibs_mask = SIMD_4x32::splat_u8(0x0F); + +inline SIMD_4x32 low_nibs(SIMD_4x32 x) + { + return lo_nibs_mask & x; + } + +inline SIMD_4x32 high_nibs(SIMD_4x32 x) + { + return (x.shr<4>() & lo_nibs_mask); + } + +inline SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) aes_enc_first_round(SIMD_4x32 B, SIMD_4x32 K) + { + return shuffle(k_ipt1, low_nibs(B)) ^ shuffle(k_ipt2, high_nibs(B)) ^ K; + } + +inline SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) aes_enc_round(SIMD_4x32 B, SIMD_4x32 K, size_t r) + { + const SIMD_4x32 Bh = high_nibs(B); + SIMD_4x32 Bl = low_nibs(B); + const SIMD_4x32 t2 = shuffle(k_inv2, Bl); + Bl ^= Bh; + + const SIMD_4x32 t5 = Bl ^ shuffle(k_inv1, t2 ^ shuffle(k_inv1, Bh)); + const SIMD_4x32 t6 = Bh ^ shuffle(k_inv1, t2 ^ shuffle(k_inv1, Bl)); + + const SIMD_4x32 t7 = shuffle(sb1t, t6) ^ shuffle(sb1u, t5) ^ K; + const SIMD_4x32 t8 = shuffle(sb2t, t6) ^ shuffle(sb2u, t5) ^ shuffle(t7, mc_forward[r % 4]); + + return shuffle(t8, mc_forward[r % 4]) ^ shuffle(t7, mc_backward[r % 4]) ^ t8; + } + +inline SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) aes_enc_last_round(SIMD_4x32 B, SIMD_4x32 K, size_t r) + { + const SIMD_4x32 Bh = high_nibs(B); + SIMD_4x32 Bl = low_nibs(B); + const SIMD_4x32 t2 = shuffle(k_inv2, Bl); + Bl ^= Bh; + + const SIMD_4x32 t5 = Bl ^ shuffle(k_inv1, t2 ^ shuffle(k_inv1, Bh)); + const SIMD_4x32 t6 = Bh ^ shuffle(k_inv1, t2 ^ shuffle(k_inv1, Bl)); + + return shuffle(shuffle(sbou, t5) ^ shuffle(sbot, t6) ^ K, vperm_sr[r % 4]); + } + +inline SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) aes_dec_first_round(SIMD_4x32 B, SIMD_4x32 K) + { + return shuffle(k_dipt1, low_nibs(B)) ^ shuffle(k_dipt2, high_nibs(B)) ^ K; + } + +inline SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) aes_dec_round(SIMD_4x32 B, SIMD_4x32 K, size_t r) + { + const SIMD_4x32 Bh = high_nibs(B); + B = low_nibs(B); + const SIMD_4x32 t2 = shuffle(k_inv2, B); + + B ^= Bh; + + const SIMD_4x32 t5 = B ^ shuffle(k_inv1, t2 ^ shuffle(k_inv1, Bh)); + const SIMD_4x32 t6 = Bh ^ shuffle(k_inv1, t2 ^ shuffle(k_inv1, B)); + + const SIMD_4x32 mc = mcx[(r-1)%4]; + + const SIMD_4x32 t8 = shuffle(sb9t, t6) ^ shuffle(sb9u, t5) ^ K; + const SIMD_4x32 t9 = shuffle(t8, mc) ^ shuffle(sbdu, t5) ^ shuffle(sbdt, t6); + const SIMD_4x32 t12 = shuffle(t9, mc) ^ shuffle(sbbu, t5) ^ shuffle(sbbt, t6); + return shuffle(t12, mc) ^ shuffle(sbeu, t5) ^ shuffle(sbet, t6); + } + +inline SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) aes_dec_last_round(SIMD_4x32 B, SIMD_4x32 K, size_t r) + { + const uint32_t which_sr = ((((r - 1) << 4) ^ 48) & 48) / 16; + + const SIMD_4x32 Bh = high_nibs(B); + B = low_nibs(B); + const SIMD_4x32 t2 = shuffle(k_inv2, B); + + B ^= Bh; + + const SIMD_4x32 t5 = B ^ shuffle(k_inv1, t2 ^ shuffle(k_inv1, Bh)); + const SIMD_4x32 t6 = Bh ^ shuffle(k_inv1, t2 ^ shuffle(k_inv1, B)); + + const SIMD_4x32 x = shuffle(sboud, t5) ^ shuffle(sbotd, t6) ^ K; + return shuffle(x, vperm_sr[which_sr]); + } + +void BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) + vperm_encrypt_blocks(const uint8_t in[], uint8_t out[], size_t blocks, + const SIMD_4x32 K[], size_t rounds) + { + CT::poison(in, blocks * 16); + + const size_t blocks2 = blocks - (blocks % 2); + + for(size_t i = 0; i != blocks2; i += 2) + { + SIMD_4x32 B0 = SIMD_4x32::load_le(in + i*16); + SIMD_4x32 B1 = SIMD_4x32::load_le(in + (i+1)*16); + + B0 = aes_enc_first_round(B0, K[0]); + B1 = aes_enc_first_round(B1, K[0]); + + for(size_t r = 1; r != rounds; ++r) + { + B0 = aes_enc_round(B0, K[r], r); + B1 = aes_enc_round(B1, K[r], r); + } + + B0 = aes_enc_last_round(B0, K[rounds], rounds); + B1 = aes_enc_last_round(B1, K[rounds], rounds); + + B0.store_le(out + i*16); + B1.store_le(out + (i+1)*16); + } + + for(size_t i = blocks2; i < blocks; ++i) + { + SIMD_4x32 B = SIMD_4x32::load_le(in + i*16); // ??? + + B = aes_enc_first_round(B, K[0]); + + for(size_t r = 1; r != rounds; ++r) + { + B = aes_enc_round(B, K[r], r); + } + + B = aes_enc_last_round(B, K[rounds], rounds); + B.store_le(out + i*16); + } + + CT::unpoison(in, blocks * 16); + CT::unpoison(out, blocks * 16); + } + +void BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) + vperm_decrypt_blocks(const uint8_t in[], uint8_t out[], size_t blocks, + const SIMD_4x32 K[], size_t rounds) + { + CT::poison(in, blocks * 16); + + const size_t blocks2 = blocks - (blocks % 2); + + for(size_t i = 0; i != blocks2; i += 2) + { + SIMD_4x32 B0 = SIMD_4x32::load_le(in + i*16); + SIMD_4x32 B1 = SIMD_4x32::load_le(in + (i+1)*16); + + B0 = aes_dec_first_round(B0, K[0]); + B1 = aes_dec_first_round(B1, K[0]); + + for(size_t r = 1; r != rounds; ++r) + { + B0 = aes_dec_round(B0, K[r], r); + B1 = aes_dec_round(B1, K[r], r); + } + + B0 = aes_dec_last_round(B0, K[rounds], rounds); + B1 = aes_dec_last_round(B1, K[rounds], rounds); + + B0.store_le(out + i*16); + B1.store_le(out + (i+1)*16); + } + + for(size_t i = blocks2; i < blocks; ++i) + { + SIMD_4x32 B = SIMD_4x32::load_le(in + i*16); // ??? + + B = aes_dec_first_round(B, K[0]); + + for(size_t r = 1; r != rounds; ++r) + { + B = aes_dec_round(B, K[r], r); + } + + B = aes_dec_last_round(B, K[rounds], rounds); + B.store_le(out + i*16); + } + + CT::unpoison(in, blocks * 16); + CT::unpoison(out, blocks * 16); + } + +} + +void AES_128::vperm_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const SIMD_4x32 K[11] = { + SIMD_4x32(&m_EK[4* 0]), SIMD_4x32(&m_EK[4* 1]), SIMD_4x32(&m_EK[4* 2]), + SIMD_4x32(&m_EK[4* 3]), SIMD_4x32(&m_EK[4* 4]), SIMD_4x32(&m_EK[4* 5]), + SIMD_4x32(&m_EK[4* 6]), SIMD_4x32(&m_EK[4* 7]), SIMD_4x32(&m_EK[4* 8]), + SIMD_4x32(&m_EK[4* 9]), SIMD_4x32(&m_EK[4*10]), + }; + + return vperm_encrypt_blocks(in, out, blocks, K, 10); + } + +void AES_128::vperm_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const SIMD_4x32 K[11] = { + SIMD_4x32(&m_DK[4* 0]), SIMD_4x32(&m_DK[4* 1]), SIMD_4x32(&m_DK[4* 2]), + SIMD_4x32(&m_DK[4* 3]), SIMD_4x32(&m_DK[4* 4]), SIMD_4x32(&m_DK[4* 5]), + SIMD_4x32(&m_DK[4* 6]), SIMD_4x32(&m_DK[4* 7]), SIMD_4x32(&m_DK[4* 8]), + SIMD_4x32(&m_DK[4* 9]), SIMD_4x32(&m_DK[4*10]), + }; + + return vperm_decrypt_blocks(in, out, blocks, K, 10); + } + +void AES_192::vperm_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const SIMD_4x32 K[13] = { + SIMD_4x32(&m_EK[4* 0]), SIMD_4x32(&m_EK[4* 1]), SIMD_4x32(&m_EK[4* 2]), + SIMD_4x32(&m_EK[4* 3]), SIMD_4x32(&m_EK[4* 4]), SIMD_4x32(&m_EK[4* 5]), + SIMD_4x32(&m_EK[4* 6]), SIMD_4x32(&m_EK[4* 7]), SIMD_4x32(&m_EK[4* 8]), + SIMD_4x32(&m_EK[4* 9]), SIMD_4x32(&m_EK[4*10]), SIMD_4x32(&m_EK[4*11]), + SIMD_4x32(&m_EK[4*12]), + }; + + return vperm_encrypt_blocks(in, out, blocks, K, 12); + } + +void AES_192::vperm_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const SIMD_4x32 K[13] = { + SIMD_4x32(&m_DK[4* 0]), SIMD_4x32(&m_DK[4* 1]), SIMD_4x32(&m_DK[4* 2]), + SIMD_4x32(&m_DK[4* 3]), SIMD_4x32(&m_DK[4* 4]), SIMD_4x32(&m_DK[4* 5]), + SIMD_4x32(&m_DK[4* 6]), SIMD_4x32(&m_DK[4* 7]), SIMD_4x32(&m_DK[4* 8]), + SIMD_4x32(&m_DK[4* 9]), SIMD_4x32(&m_DK[4*10]), SIMD_4x32(&m_DK[4*11]), + SIMD_4x32(&m_DK[4*12]), + }; + + return vperm_decrypt_blocks(in, out, blocks, K, 12); + } + +void AES_256::vperm_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const SIMD_4x32 K[15] = { + SIMD_4x32(&m_EK[4* 0]), SIMD_4x32(&m_EK[4* 1]), SIMD_4x32(&m_EK[4* 2]), + SIMD_4x32(&m_EK[4* 3]), SIMD_4x32(&m_EK[4* 4]), SIMD_4x32(&m_EK[4* 5]), + SIMD_4x32(&m_EK[4* 6]), SIMD_4x32(&m_EK[4* 7]), SIMD_4x32(&m_EK[4* 8]), + SIMD_4x32(&m_EK[4* 9]), SIMD_4x32(&m_EK[4*10]), SIMD_4x32(&m_EK[4*11]), + SIMD_4x32(&m_EK[4*12]), SIMD_4x32(&m_EK[4*13]), SIMD_4x32(&m_EK[4*14]), + }; + + return vperm_encrypt_blocks(in, out, blocks, K, 14); + } + +void AES_256::vperm_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const SIMD_4x32 K[15] = { + SIMD_4x32(&m_DK[4* 0]), SIMD_4x32(&m_DK[4* 1]), SIMD_4x32(&m_DK[4* 2]), + SIMD_4x32(&m_DK[4* 3]), SIMD_4x32(&m_DK[4* 4]), SIMD_4x32(&m_DK[4* 5]), + SIMD_4x32(&m_DK[4* 6]), SIMD_4x32(&m_DK[4* 7]), SIMD_4x32(&m_DK[4* 8]), + SIMD_4x32(&m_DK[4* 9]), SIMD_4x32(&m_DK[4*10]), SIMD_4x32(&m_DK[4*11]), + SIMD_4x32(&m_DK[4*12]), SIMD_4x32(&m_DK[4*13]), SIMD_4x32(&m_DK[4*14]), + }; + + return vperm_decrypt_blocks(in, out, blocks, K, 14); + } + +namespace { + +inline SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) + aes_schedule_transform(SIMD_4x32 input, + SIMD_4x32 table_1, + SIMD_4x32 table_2) + { + return shuffle(table_1, low_nibs(input)) ^ shuffle(table_2, high_nibs(input)); + } + +SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) aes_schedule_mangle(SIMD_4x32 k, uint8_t round_no) + { + const SIMD_4x32 mc_forward0(0x00030201, 0x04070605, 0x080B0A09, 0x0C0F0E0D); + + SIMD_4x32 t = shuffle(k ^ SIMD_4x32::splat_u8(0x5B), mc_forward0); + SIMD_4x32 t2 = t; + t = shuffle(t, mc_forward0); + t2 = t ^ t2 ^ shuffle(t, mc_forward0); + return shuffle(t2, vperm_sr[round_no % 4]); + } + +SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) aes_schedule_mangle_dec(SIMD_4x32 k, uint8_t round_no) + { + const SIMD_4x32 mc_forward0(0x00030201, 0x04070605, 0x080B0A09, 0x0C0F0E0D); + + const SIMD_4x32 dsk[8] = { + SIMD_4x32(0x7ED9A700, 0xB6116FC8, 0x82255BFC, 0x4AED9334), + SIMD_4x32(0x27143300, 0x45765162, 0xE9DAFDCE, 0x8BB89FAC), + SIMD_4x32(0xCCA86400, 0x27438FEB, 0xADC90561, 0x4622EE8A), + SIMD_4x32(0x4F92DD00, 0x815C13CE, 0xBD602FF2, 0x73AEE13C), + SIMD_4x32(0x01C6C700, 0x03C4C502, 0xFA3D3CFB, 0xF83F3EF9), + SIMD_4x32(0x38CFF700, 0xEE1921D6, 0x7384BC4B, 0xA5526A9D), + SIMD_4x32(0x53732000, 0xE3C390B0, 0x10306343, 0xA080D3F3), + SIMD_4x32(0x036982E8, 0xA0CA214B, 0x8CE60D67, 0x2F45AEC4), + }; + + SIMD_4x32 t = aes_schedule_transform(k, dsk[0], dsk[1]); + SIMD_4x32 output = shuffle(t, mc_forward0); + + t = aes_schedule_transform(t, dsk[2], dsk[3]); + output = shuffle(t ^ output, mc_forward0); + + t = aes_schedule_transform(t, dsk[4], dsk[5]); + output = shuffle(t ^ output, mc_forward0); + + t = aes_schedule_transform(t, dsk[6], dsk[7]); + output = shuffle(t ^ output, mc_forward0); + + return shuffle(output, vperm_sr[round_no % 4]); + } + +SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) aes_schedule_mangle_last(SIMD_4x32 k, uint8_t round_no) + { + const SIMD_4x32 out_tr1(0xD6B66000, 0xFF9F4929, 0xDEBE6808, 0xF7974121); + const SIMD_4x32 out_tr2(0x50BCEC00, 0x01EDBD51, 0xB05C0CE0, 0xE10D5DB1); + + k = shuffle(k, vperm_sr[round_no % 4]); + k ^= SIMD_4x32::splat_u8(0x5B); + return aes_schedule_transform(k, out_tr1, out_tr2); + } + +SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) aes_schedule_mangle_last_dec(SIMD_4x32 k) + { + const SIMD_4x32 deskew1(0x47A4E300, 0x07E4A340, 0x5DBEF91A, 0x1DFEB95A); + const SIMD_4x32 deskew2(0x83EA6900, 0x5F36B5DC, 0xF49D1E77, 0x2841C2AB); + + k ^= SIMD_4x32::splat_u8(0x5B); + return aes_schedule_transform(k, deskew1, deskew2); + } + +SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) aes_schedule_round(SIMD_4x32 input1, SIMD_4x32 input2) + { + SIMD_4x32 smeared = input2 ^ input2.shift_elems_left<1>(); + smeared ^= smeared.shift_elems_left<2>(); + smeared ^= SIMD_4x32::splat_u8(0x5B); + + const SIMD_4x32 Bh = high_nibs(input1); + SIMD_4x32 Bl = low_nibs(input1); + + const SIMD_4x32 t2 = shuffle(k_inv2, Bl); + + Bl ^= Bh; + + SIMD_4x32 t5 = Bl ^ shuffle(k_inv1, t2 ^ shuffle(k_inv1, Bh)); + SIMD_4x32 t6 = Bh ^ shuffle(k_inv1, t2 ^ shuffle(k_inv1, Bl)); + + return smeared ^ shuffle(sb1u, t5) ^ shuffle(sb1t, t6); + } + +SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) aes_schedule_round(SIMD_4x32 rc, SIMD_4x32 input1, SIMD_4x32 input2) + { + // This byte shuffle is equivalent to alignr<1>(shuffle32(input1, (3,3,3,3))); + const SIMD_4x32 shuffle3333_15 = SIMD_4x32::splat(0x0C0F0E0D); + return aes_schedule_round(shuffle(input1, shuffle3333_15), input2 ^ rc); + } + +SIMD_4x32 BOTAN_FUNC_ISA(BOTAN_VPERM_ISA) aes_schedule_192_smear(SIMD_4x32 x, SIMD_4x32 y) + { + const SIMD_4x32 shuffle3332 = + SIMD_4x32(0x0B0A0908, 0x0F0E0D0C, 0x0F0E0D0C, 0x0F0E0D0C); + const SIMD_4x32 shuffle2000 = + SIMD_4x32(0x03020100, 0x03020100, 0x03020100, 0x0B0A0908); + + const SIMD_4x32 zero_top_half(0, 0, 0xFFFFFFFF, 0xFFFFFFFF); + y &= zero_top_half; + return y ^ shuffle(x, shuffle3332) ^ shuffle(y, shuffle2000); + } + +} + +void AES_128::vperm_key_schedule(const uint8_t keyb[], size_t) + { + m_EK.resize(11*4); + m_DK.resize(11*4); + + SIMD_4x32 key = SIMD_4x32::load_le(keyb); + + shuffle(key, vperm_sr[2]).store_le(&m_DK[4*10]); + + key = aes_schedule_transform(key, k_ipt1, k_ipt2); + key.store_le(&m_EK[0]); + + for(size_t i = 1; i != 10; ++i) + { + key = aes_schedule_round(rcon[i-1], key, key); + + aes_schedule_mangle(key, (12-i) % 4).store_le(&m_EK[4*i]); + + aes_schedule_mangle_dec(key, (10-i)%4).store_le(&m_DK[4*(10-i)]); + } + + key = aes_schedule_round(rcon[9], key, key); + aes_schedule_mangle_last(key, 2).store_le(&m_EK[4*10]); + aes_schedule_mangle_last_dec(key).store_le(&m_DK[0]); + } + +void AES_192::vperm_key_schedule(const uint8_t keyb[], size_t) + { + m_EK.resize(13*4); + m_DK.resize(13*4); + + SIMD_4x32 key1 = SIMD_4x32::load_le(keyb); + SIMD_4x32 key2 = SIMD_4x32::load_le(keyb + 8); + + shuffle(key1, vperm_sr[0]).store_le(&m_DK[12*4]); + + key1 = aes_schedule_transform(key1, k_ipt1, k_ipt2); + key2 = aes_schedule_transform(key2, k_ipt1, k_ipt2); + + key1.store_le(&m_EK[0]); + + for(size_t i = 0; i != 4; ++i) + { + // key2 with 8 high bytes masked off + SIMD_4x32 t = key2; + key2 = aes_schedule_round(rcon[2*i], key2, key1); + const SIMD_4x32 key2t = alignr8(key2, t); + aes_schedule_mangle(key2t, (i+3)%4).store_le(&m_EK[4*(3*i+1)]); + aes_schedule_mangle_dec(key2t, (i+3)%4).store_le(&m_DK[4*(11-3*i)]); + + t = aes_schedule_192_smear(key2, t); + + aes_schedule_mangle(t, (i+2)%4).store_le(&m_EK[4*(3*i+2)]); + aes_schedule_mangle_dec(t, (i+2)%4).store_le(&m_DK[4*(10-3*i)]); + + key2 = aes_schedule_round(rcon[2*i+1], t, key2); + + if(i == 3) + { + aes_schedule_mangle_last(key2, (i+1)%4).store_le(&m_EK[4*(3*i+3)]); + aes_schedule_mangle_last_dec(key2).store_le(&m_DK[4*(9-3*i)]); + } + else + { + aes_schedule_mangle(key2, (i+1)%4).store_le(&m_EK[4*(3*i+3)]); + aes_schedule_mangle_dec(key2, (i+1)%4).store_le(&m_DK[4*(9-3*i)]); + } + + key1 = key2; + key2 = aes_schedule_192_smear(key2, t); + } + } + +void AES_256::vperm_key_schedule(const uint8_t keyb[], size_t) + { + m_EK.resize(15*4); + m_DK.resize(15*4); + + SIMD_4x32 key1 = SIMD_4x32::load_le(keyb); + SIMD_4x32 key2 = SIMD_4x32::load_le(keyb + 16); + + shuffle(key1, vperm_sr[2]).store_le(&m_DK[4*14]); + + key1 = aes_schedule_transform(key1, k_ipt1, k_ipt2); + key2 = aes_schedule_transform(key2, k_ipt1, k_ipt2); + + key1.store_le(&m_EK[0]); + aes_schedule_mangle(key2, 3).store_le(&m_EK[4]); + + aes_schedule_mangle_dec(key2, 1).store_le(&m_DK[4*13]); + + const SIMD_4x32 shuffle3333 = SIMD_4x32::splat(0x0F0E0D0C); + + for(size_t i = 2; i != 14; i += 2) + { + const SIMD_4x32 k_t = key2; + key1 = key2 = aes_schedule_round(rcon[(i/2)-1], key2, key1); + + aes_schedule_mangle(key2, i % 4).store_le(&m_EK[4*i]); + aes_schedule_mangle_dec(key2, (i+2)%4).store_le(&m_DK[4*(14-i)]); + + key2 = aes_schedule_round(shuffle(key2, shuffle3333), k_t); + + aes_schedule_mangle(key2, (i-1)%4).store_le(&m_EK[4*(i+1)]); + aes_schedule_mangle_dec(key2, (i+1)%4).store_le(&m_DK[4*(13-i)]); + } + + key2 = aes_schedule_round(rcon[6], key2, key1); + + aes_schedule_mangle_last(key2, 2).store_le(&m_EK[4*14]); + aes_schedule_mangle_last_dec(key2).store_le(&m_DK[0]); + } + +} diff --git a/comm/third_party/botan/src/lib/block/aes/aes_vperm/info.txt b/comm/third_party/botan/src/lib/block/aes/aes_vperm/info.txt new file mode 100644 index 0000000000..0b7eabaace --- /dev/null +++ b/comm/third_party/botan/src/lib/block/aes/aes_vperm/info.txt @@ -0,0 +1,36 @@ +<defines> +AES_VPERM -> 20190901 +</defines> + +endian little + +<isa> +x86_32:sse2 +x86_64:sse2 +x86_32:ssse3 +x86_64:ssse3 +arm32:neon +arm64:neon +ppc32:altivec +ppc64:altivec +</isa> + +<arch> +x86_32 +x86_64 +arm32 +arm64 +ppc32 +ppc64 +</arch> + +<requires> +simd +</requires> + +<cc> +gcc +clang +msvc:19.10 # VC 2017 +sunstudio +</cc> diff --git a/comm/third_party/botan/src/lib/block/aes/info.txt b/comm/third_party/botan/src/lib/block/aes/info.txt new file mode 100644 index 0000000000..62455cf2c3 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/aes/info.txt @@ -0,0 +1,3 @@ +<defines> +AES -> 20131128 +</defines> diff --git a/comm/third_party/botan/src/lib/block/aria/aria.cpp b/comm/third_party/botan/src/lib/block/aria/aria.cpp new file mode 100644 index 0000000000..79105a88c4 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/aria/aria.cpp @@ -0,0 +1,506 @@ +/* +* ARIA +* Adapted for Botan by Jeffrey Walton, public domain +* +* Further changes +* (C) 2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +* +* This ARIA implementation is based on the 32-bit implementation by Aaram Yun from the +* National Security Research Institute, KOREA. Aaram Yun's implementation is based on +* the 8-bit implementation by Jin Hong. The source files are available in ARIA.zip from +* the Korea Internet & Security Agency website. +* <A HREF="https://tools.ietf.org/html/rfc5794">RFC 5794, A Description of the ARIA Encryption Algorithm</A>, +* <A HREF="http://seed.kisa.or.kr/iwt/ko/bbs/EgovReferenceList.do?bbsId=BBSMSTR_000000000002">Korea +* Internet & Security Agency homepage</A> +*/ + +#include <botan/aria.h> +#include <botan/loadstor.h> +#include <botan/rotate.h> +#include <botan/cpuid.h> + +namespace Botan { + +namespace { + +namespace ARIA_F { + +alignas(64) +const uint32_t S1[256]={ + 0x00636363,0x007c7c7c,0x00777777,0x007b7b7b,0x00f2f2f2,0x006b6b6b,0x006f6f6f,0x00c5c5c5, + 0x00303030,0x00010101,0x00676767,0x002b2b2b,0x00fefefe,0x00d7d7d7,0x00ababab,0x00767676, + 0x00cacaca,0x00828282,0x00c9c9c9,0x007d7d7d,0x00fafafa,0x00595959,0x00474747,0x00f0f0f0, + 0x00adadad,0x00d4d4d4,0x00a2a2a2,0x00afafaf,0x009c9c9c,0x00a4a4a4,0x00727272,0x00c0c0c0, + 0x00b7b7b7,0x00fdfdfd,0x00939393,0x00262626,0x00363636,0x003f3f3f,0x00f7f7f7,0x00cccccc, + 0x00343434,0x00a5a5a5,0x00e5e5e5,0x00f1f1f1,0x00717171,0x00d8d8d8,0x00313131,0x00151515, + 0x00040404,0x00c7c7c7,0x00232323,0x00c3c3c3,0x00181818,0x00969696,0x00050505,0x009a9a9a, + 0x00070707,0x00121212,0x00808080,0x00e2e2e2,0x00ebebeb,0x00272727,0x00b2b2b2,0x00757575, + 0x00090909,0x00838383,0x002c2c2c,0x001a1a1a,0x001b1b1b,0x006e6e6e,0x005a5a5a,0x00a0a0a0, + 0x00525252,0x003b3b3b,0x00d6d6d6,0x00b3b3b3,0x00292929,0x00e3e3e3,0x002f2f2f,0x00848484, + 0x00535353,0x00d1d1d1,0x00000000,0x00ededed,0x00202020,0x00fcfcfc,0x00b1b1b1,0x005b5b5b, + 0x006a6a6a,0x00cbcbcb,0x00bebebe,0x00393939,0x004a4a4a,0x004c4c4c,0x00585858,0x00cfcfcf, + 0x00d0d0d0,0x00efefef,0x00aaaaaa,0x00fbfbfb,0x00434343,0x004d4d4d,0x00333333,0x00858585, + 0x00454545,0x00f9f9f9,0x00020202,0x007f7f7f,0x00505050,0x003c3c3c,0x009f9f9f,0x00a8a8a8, + 0x00515151,0x00a3a3a3,0x00404040,0x008f8f8f,0x00929292,0x009d9d9d,0x00383838,0x00f5f5f5, + 0x00bcbcbc,0x00b6b6b6,0x00dadada,0x00212121,0x00101010,0x00ffffff,0x00f3f3f3,0x00d2d2d2, + 0x00cdcdcd,0x000c0c0c,0x00131313,0x00ececec,0x005f5f5f,0x00979797,0x00444444,0x00171717, + 0x00c4c4c4,0x00a7a7a7,0x007e7e7e,0x003d3d3d,0x00646464,0x005d5d5d,0x00191919,0x00737373, + 0x00606060,0x00818181,0x004f4f4f,0x00dcdcdc,0x00222222,0x002a2a2a,0x00909090,0x00888888, + 0x00464646,0x00eeeeee,0x00b8b8b8,0x00141414,0x00dedede,0x005e5e5e,0x000b0b0b,0x00dbdbdb, + 0x00e0e0e0,0x00323232,0x003a3a3a,0x000a0a0a,0x00494949,0x00060606,0x00242424,0x005c5c5c, + 0x00c2c2c2,0x00d3d3d3,0x00acacac,0x00626262,0x00919191,0x00959595,0x00e4e4e4,0x00797979, + 0x00e7e7e7,0x00c8c8c8,0x00373737,0x006d6d6d,0x008d8d8d,0x00d5d5d5,0x004e4e4e,0x00a9a9a9, + 0x006c6c6c,0x00565656,0x00f4f4f4,0x00eaeaea,0x00656565,0x007a7a7a,0x00aeaeae,0x00080808, + 0x00bababa,0x00787878,0x00252525,0x002e2e2e,0x001c1c1c,0x00a6a6a6,0x00b4b4b4,0x00c6c6c6, + 0x00e8e8e8,0x00dddddd,0x00747474,0x001f1f1f,0x004b4b4b,0x00bdbdbd,0x008b8b8b,0x008a8a8a, + 0x00707070,0x003e3e3e,0x00b5b5b5,0x00666666,0x00484848,0x00030303,0x00f6f6f6,0x000e0e0e, + 0x00616161,0x00353535,0x00575757,0x00b9b9b9,0x00868686,0x00c1c1c1,0x001d1d1d,0x009e9e9e, + 0x00e1e1e1,0x00f8f8f8,0x00989898,0x00111111,0x00696969,0x00d9d9d9,0x008e8e8e,0x00949494, + 0x009b9b9b,0x001e1e1e,0x00878787,0x00e9e9e9,0x00cecece,0x00555555,0x00282828,0x00dfdfdf, + 0x008c8c8c,0x00a1a1a1,0x00898989,0x000d0d0d,0x00bfbfbf,0x00e6e6e6,0x00424242,0x00686868, + 0x00414141,0x00999999,0x002d2d2d,0x000f0f0f,0x00b0b0b0,0x00545454,0x00bbbbbb,0x00161616 +}; + +alignas(64) +const uint32_t S2[256]={ + 0xe200e2e2,0x4e004e4e,0x54005454,0xfc00fcfc,0x94009494,0xc200c2c2,0x4a004a4a,0xcc00cccc, + 0x62006262,0x0d000d0d,0x6a006a6a,0x46004646,0x3c003c3c,0x4d004d4d,0x8b008b8b,0xd100d1d1, + 0x5e005e5e,0xfa00fafa,0x64006464,0xcb00cbcb,0xb400b4b4,0x97009797,0xbe00bebe,0x2b002b2b, + 0xbc00bcbc,0x77007777,0x2e002e2e,0x03000303,0xd300d3d3,0x19001919,0x59005959,0xc100c1c1, + 0x1d001d1d,0x06000606,0x41004141,0x6b006b6b,0x55005555,0xf000f0f0,0x99009999,0x69006969, + 0xea00eaea,0x9c009c9c,0x18001818,0xae00aeae,0x63006363,0xdf00dfdf,0xe700e7e7,0xbb00bbbb, + 0x00000000,0x73007373,0x66006666,0xfb00fbfb,0x96009696,0x4c004c4c,0x85008585,0xe400e4e4, + 0x3a003a3a,0x09000909,0x45004545,0xaa00aaaa,0x0f000f0f,0xee00eeee,0x10001010,0xeb00ebeb, + 0x2d002d2d,0x7f007f7f,0xf400f4f4,0x29002929,0xac00acac,0xcf00cfcf,0xad00adad,0x91009191, + 0x8d008d8d,0x78007878,0xc800c8c8,0x95009595,0xf900f9f9,0x2f002f2f,0xce00cece,0xcd00cdcd, + 0x08000808,0x7a007a7a,0x88008888,0x38003838,0x5c005c5c,0x83008383,0x2a002a2a,0x28002828, + 0x47004747,0xdb00dbdb,0xb800b8b8,0xc700c7c7,0x93009393,0xa400a4a4,0x12001212,0x53005353, + 0xff00ffff,0x87008787,0x0e000e0e,0x31003131,0x36003636,0x21002121,0x58005858,0x48004848, + 0x01000101,0x8e008e8e,0x37003737,0x74007474,0x32003232,0xca00caca,0xe900e9e9,0xb100b1b1, + 0xb700b7b7,0xab00abab,0x0c000c0c,0xd700d7d7,0xc400c4c4,0x56005656,0x42004242,0x26002626, + 0x07000707,0x98009898,0x60006060,0xd900d9d9,0xb600b6b6,0xb900b9b9,0x11001111,0x40004040, + 0xec00ecec,0x20002020,0x8c008c8c,0xbd00bdbd,0xa000a0a0,0xc900c9c9,0x84008484,0x04000404, + 0x49004949,0x23002323,0xf100f1f1,0x4f004f4f,0x50005050,0x1f001f1f,0x13001313,0xdc00dcdc, + 0xd800d8d8,0xc000c0c0,0x9e009e9e,0x57005757,0xe300e3e3,0xc300c3c3,0x7b007b7b,0x65006565, + 0x3b003b3b,0x02000202,0x8f008f8f,0x3e003e3e,0xe800e8e8,0x25002525,0x92009292,0xe500e5e5, + 0x15001515,0xdd00dddd,0xfd00fdfd,0x17001717,0xa900a9a9,0xbf00bfbf,0xd400d4d4,0x9a009a9a, + 0x7e007e7e,0xc500c5c5,0x39003939,0x67006767,0xfe00fefe,0x76007676,0x9d009d9d,0x43004343, + 0xa700a7a7,0xe100e1e1,0xd000d0d0,0xf500f5f5,0x68006868,0xf200f2f2,0x1b001b1b,0x34003434, + 0x70007070,0x05000505,0xa300a3a3,0x8a008a8a,0xd500d5d5,0x79007979,0x86008686,0xa800a8a8, + 0x30003030,0xc600c6c6,0x51005151,0x4b004b4b,0x1e001e1e,0xa600a6a6,0x27002727,0xf600f6f6, + 0x35003535,0xd200d2d2,0x6e006e6e,0x24002424,0x16001616,0x82008282,0x5f005f5f,0xda00dada, + 0xe600e6e6,0x75007575,0xa200a2a2,0xef00efef,0x2c002c2c,0xb200b2b2,0x1c001c1c,0x9f009f9f, + 0x5d005d5d,0x6f006f6f,0x80008080,0x0a000a0a,0x72007272,0x44004444,0x9b009b9b,0x6c006c6c, + 0x90009090,0x0b000b0b,0x5b005b5b,0x33003333,0x7d007d7d,0x5a005a5a,0x52005252,0xf300f3f3, + 0x61006161,0xa100a1a1,0xf700f7f7,0xb000b0b0,0xd600d6d6,0x3f003f3f,0x7c007c7c,0x6d006d6d, + 0xed00eded,0x14001414,0xe000e0e0,0xa500a5a5,0x3d003d3d,0x22002222,0xb300b3b3,0xf800f8f8, + 0x89008989,0xde00dede,0x71007171,0x1a001a1a,0xaf00afaf,0xba00baba,0xb500b5b5,0x81008181 +}; + +alignas(64) +const uint32_t X1[256]={ + 0x52520052,0x09090009,0x6a6a006a,0xd5d500d5,0x30300030,0x36360036,0xa5a500a5,0x38380038, + 0xbfbf00bf,0x40400040,0xa3a300a3,0x9e9e009e,0x81810081,0xf3f300f3,0xd7d700d7,0xfbfb00fb, + 0x7c7c007c,0xe3e300e3,0x39390039,0x82820082,0x9b9b009b,0x2f2f002f,0xffff00ff,0x87870087, + 0x34340034,0x8e8e008e,0x43430043,0x44440044,0xc4c400c4,0xdede00de,0xe9e900e9,0xcbcb00cb, + 0x54540054,0x7b7b007b,0x94940094,0x32320032,0xa6a600a6,0xc2c200c2,0x23230023,0x3d3d003d, + 0xeeee00ee,0x4c4c004c,0x95950095,0x0b0b000b,0x42420042,0xfafa00fa,0xc3c300c3,0x4e4e004e, + 0x08080008,0x2e2e002e,0xa1a100a1,0x66660066,0x28280028,0xd9d900d9,0x24240024,0xb2b200b2, + 0x76760076,0x5b5b005b,0xa2a200a2,0x49490049,0x6d6d006d,0x8b8b008b,0xd1d100d1,0x25250025, + 0x72720072,0xf8f800f8,0xf6f600f6,0x64640064,0x86860086,0x68680068,0x98980098,0x16160016, + 0xd4d400d4,0xa4a400a4,0x5c5c005c,0xcccc00cc,0x5d5d005d,0x65650065,0xb6b600b6,0x92920092, + 0x6c6c006c,0x70700070,0x48480048,0x50500050,0xfdfd00fd,0xeded00ed,0xb9b900b9,0xdada00da, + 0x5e5e005e,0x15150015,0x46460046,0x57570057,0xa7a700a7,0x8d8d008d,0x9d9d009d,0x84840084, + 0x90900090,0xd8d800d8,0xabab00ab,0x00000000,0x8c8c008c,0xbcbc00bc,0xd3d300d3,0x0a0a000a, + 0xf7f700f7,0xe4e400e4,0x58580058,0x05050005,0xb8b800b8,0xb3b300b3,0x45450045,0x06060006, + 0xd0d000d0,0x2c2c002c,0x1e1e001e,0x8f8f008f,0xcaca00ca,0x3f3f003f,0x0f0f000f,0x02020002, + 0xc1c100c1,0xafaf00af,0xbdbd00bd,0x03030003,0x01010001,0x13130013,0x8a8a008a,0x6b6b006b, + 0x3a3a003a,0x91910091,0x11110011,0x41410041,0x4f4f004f,0x67670067,0xdcdc00dc,0xeaea00ea, + 0x97970097,0xf2f200f2,0xcfcf00cf,0xcece00ce,0xf0f000f0,0xb4b400b4,0xe6e600e6,0x73730073, + 0x96960096,0xacac00ac,0x74740074,0x22220022,0xe7e700e7,0xadad00ad,0x35350035,0x85850085, + 0xe2e200e2,0xf9f900f9,0x37370037,0xe8e800e8,0x1c1c001c,0x75750075,0xdfdf00df,0x6e6e006e, + 0x47470047,0xf1f100f1,0x1a1a001a,0x71710071,0x1d1d001d,0x29290029,0xc5c500c5,0x89890089, + 0x6f6f006f,0xb7b700b7,0x62620062,0x0e0e000e,0xaaaa00aa,0x18180018,0xbebe00be,0x1b1b001b, + 0xfcfc00fc,0x56560056,0x3e3e003e,0x4b4b004b,0xc6c600c6,0xd2d200d2,0x79790079,0x20200020, + 0x9a9a009a,0xdbdb00db,0xc0c000c0,0xfefe00fe,0x78780078,0xcdcd00cd,0x5a5a005a,0xf4f400f4, + 0x1f1f001f,0xdddd00dd,0xa8a800a8,0x33330033,0x88880088,0x07070007,0xc7c700c7,0x31310031, + 0xb1b100b1,0x12120012,0x10100010,0x59590059,0x27270027,0x80800080,0xecec00ec,0x5f5f005f, + 0x60600060,0x51510051,0x7f7f007f,0xa9a900a9,0x19190019,0xb5b500b5,0x4a4a004a,0x0d0d000d, + 0x2d2d002d,0xe5e500e5,0x7a7a007a,0x9f9f009f,0x93930093,0xc9c900c9,0x9c9c009c,0xefef00ef, + 0xa0a000a0,0xe0e000e0,0x3b3b003b,0x4d4d004d,0xaeae00ae,0x2a2a002a,0xf5f500f5,0xb0b000b0, + 0xc8c800c8,0xebeb00eb,0xbbbb00bb,0x3c3c003c,0x83830083,0x53530053,0x99990099,0x61610061, + 0x17170017,0x2b2b002b,0x04040004,0x7e7e007e,0xbaba00ba,0x77770077,0xd6d600d6,0x26260026, + 0xe1e100e1,0x69690069,0x14140014,0x63630063,0x55550055,0x21210021,0x0c0c000c,0x7d7d007d +}; + +alignas(64) +const uint32_t X2[256]={ + 0x30303000,0x68686800,0x99999900,0x1b1b1b00,0x87878700,0xb9b9b900,0x21212100,0x78787800, + 0x50505000,0x39393900,0xdbdbdb00,0xe1e1e100,0x72727200,0x09090900,0x62626200,0x3c3c3c00, + 0x3e3e3e00,0x7e7e7e00,0x5e5e5e00,0x8e8e8e00,0xf1f1f100,0xa0a0a000,0xcccccc00,0xa3a3a300, + 0x2a2a2a00,0x1d1d1d00,0xfbfbfb00,0xb6b6b600,0xd6d6d600,0x20202000,0xc4c4c400,0x8d8d8d00, + 0x81818100,0x65656500,0xf5f5f500,0x89898900,0xcbcbcb00,0x9d9d9d00,0x77777700,0xc6c6c600, + 0x57575700,0x43434300,0x56565600,0x17171700,0xd4d4d400,0x40404000,0x1a1a1a00,0x4d4d4d00, + 0xc0c0c000,0x63636300,0x6c6c6c00,0xe3e3e300,0xb7b7b700,0xc8c8c800,0x64646400,0x6a6a6a00, + 0x53535300,0xaaaaaa00,0x38383800,0x98989800,0x0c0c0c00,0xf4f4f400,0x9b9b9b00,0xededed00, + 0x7f7f7f00,0x22222200,0x76767600,0xafafaf00,0xdddddd00,0x3a3a3a00,0x0b0b0b00,0x58585800, + 0x67676700,0x88888800,0x06060600,0xc3c3c300,0x35353500,0x0d0d0d00,0x01010100,0x8b8b8b00, + 0x8c8c8c00,0xc2c2c200,0xe6e6e600,0x5f5f5f00,0x02020200,0x24242400,0x75757500,0x93939300, + 0x66666600,0x1e1e1e00,0xe5e5e500,0xe2e2e200,0x54545400,0xd8d8d800,0x10101000,0xcecece00, + 0x7a7a7a00,0xe8e8e800,0x08080800,0x2c2c2c00,0x12121200,0x97979700,0x32323200,0xababab00, + 0xb4b4b400,0x27272700,0x0a0a0a00,0x23232300,0xdfdfdf00,0xefefef00,0xcacaca00,0xd9d9d900, + 0xb8b8b800,0xfafafa00,0xdcdcdc00,0x31313100,0x6b6b6b00,0xd1d1d100,0xadadad00,0x19191900, + 0x49494900,0xbdbdbd00,0x51515100,0x96969600,0xeeeeee00,0xe4e4e400,0xa8a8a800,0x41414100, + 0xdadada00,0xffffff00,0xcdcdcd00,0x55555500,0x86868600,0x36363600,0xbebebe00,0x61616100, + 0x52525200,0xf8f8f800,0xbbbbbb00,0x0e0e0e00,0x82828200,0x48484800,0x69696900,0x9a9a9a00, + 0xe0e0e000,0x47474700,0x9e9e9e00,0x5c5c5c00,0x04040400,0x4b4b4b00,0x34343400,0x15151500, + 0x79797900,0x26262600,0xa7a7a700,0xdedede00,0x29292900,0xaeaeae00,0x92929200,0xd7d7d700, + 0x84848400,0xe9e9e900,0xd2d2d200,0xbababa00,0x5d5d5d00,0xf3f3f300,0xc5c5c500,0xb0b0b000, + 0xbfbfbf00,0xa4a4a400,0x3b3b3b00,0x71717100,0x44444400,0x46464600,0x2b2b2b00,0xfcfcfc00, + 0xebebeb00,0x6f6f6f00,0xd5d5d500,0xf6f6f600,0x14141400,0xfefefe00,0x7c7c7c00,0x70707000, + 0x5a5a5a00,0x7d7d7d00,0xfdfdfd00,0x2f2f2f00,0x18181800,0x83838300,0x16161600,0xa5a5a500, + 0x91919100,0x1f1f1f00,0x05050500,0x95959500,0x74747400,0xa9a9a900,0xc1c1c100,0x5b5b5b00, + 0x4a4a4a00,0x85858500,0x6d6d6d00,0x13131300,0x07070700,0x4f4f4f00,0x4e4e4e00,0x45454500, + 0xb2b2b200,0x0f0f0f00,0xc9c9c900,0x1c1c1c00,0xa6a6a600,0xbcbcbc00,0xececec00,0x73737300, + 0x90909000,0x7b7b7b00,0xcfcfcf00,0x59595900,0x8f8f8f00,0xa1a1a100,0xf9f9f900,0x2d2d2d00, + 0xf2f2f200,0xb1b1b100,0x00000000,0x94949400,0x37373700,0x9f9f9f00,0xd0d0d000,0x2e2e2e00, + 0x9c9c9c00,0x6e6e6e00,0x28282800,0x3f3f3f00,0x80808000,0xf0f0f000,0x3d3d3d00,0xd3d3d300, + 0x25252500,0x8a8a8a00,0xb5b5b500,0xe7e7e700,0x42424200,0xb3b3b300,0xc7c7c700,0xeaeaea00, + 0xf7f7f700,0x4c4c4c00,0x11111100,0x33333300,0x03030300,0xa2a2a200,0xacacac00,0x60606000 +}; + +inline void ARIA_FO(uint32_t& T0, uint32_t& T1, uint32_t& T2, uint32_t& T3) + { + T0 = S1[get_byte(0,T0)] ^ S2[get_byte(1,T0)] ^ X1[get_byte(2,T0)] ^ X2[get_byte(3,T0)]; + T1 = S1[get_byte(0,T1)] ^ S2[get_byte(1,T1)] ^ X1[get_byte(2,T1)] ^ X2[get_byte(3,T1)]; + T2 = S1[get_byte(0,T2)] ^ S2[get_byte(1,T2)] ^ X1[get_byte(2,T2)] ^ X2[get_byte(3,T2)]; + T3 = S1[get_byte(0,T3)] ^ S2[get_byte(1,T3)] ^ X1[get_byte(2,T3)] ^ X2[get_byte(3,T3)]; + + T1 ^= T2; + T2 ^= T3; T0 ^= T1; + T3 ^= T1; T2 ^= T0; + T1 ^= T2; + + T1 = ((T1 << 8) & 0xFF00FF00) | ((T1 >> 8) & 0x00FF00FF); + T2 = rotr<16>(T2); + T3 = reverse_bytes(T3); + + T1 ^= T2; + T2 ^= T3; T0 ^= T1; + T3 ^= T1; T2 ^= T0; + T1 ^= T2; + } + +inline void ARIA_FE(uint32_t& T0, uint32_t& T1, uint32_t& T2, uint32_t& T3) + { + T0 = X1[get_byte(0,T0)] ^ X2[get_byte(1,T0)] ^ S1[get_byte(2,T0)] ^ S2[get_byte(3,T0)]; + T1 = X1[get_byte(0,T1)] ^ X2[get_byte(1,T1)] ^ S1[get_byte(2,T1)] ^ S2[get_byte(3,T1)]; + T2 = X1[get_byte(0,T2)] ^ X2[get_byte(1,T2)] ^ S1[get_byte(2,T2)] ^ S2[get_byte(3,T2)]; + T3 = X1[get_byte(0,T3)] ^ X2[get_byte(1,T3)] ^ S1[get_byte(2,T3)] ^ S2[get_byte(3,T3)]; + + T1 ^= T2; + T2 ^= T3; T0 ^= T1; + T3 ^= T1; T2 ^= T0; + T1 ^= T2; + + T3 = ((T3 << 8) & 0xFF00FF00) | ((T3 >> 8) & 0x00FF00FF); + T0 = rotr<16>(T0); + T1 = reverse_bytes(T1); + + T1 ^= T2; + T2 ^= T3; T0 ^= T1; + T3 ^= T1; T2 ^= T0; + T1 ^= T2; + } + +/* +* ARIA encryption and decryption +*/ +void transform(const uint8_t in[], uint8_t out[], size_t blocks, + const secure_vector<uint32_t>& KS) + { + /* + * Hit every cache line of S1, S2, X1, X2 + * + * The initializer of Z ensures Z == 0xFFFFFFFF for any cache line + * size that is a power of 2 and <= 512 + */ + const size_t cache_line_size = CPUID::cache_line_size(); + + volatile uint32_t Z = 0x11101010; + for(size_t i = 0; i < 256; i += cache_line_size / sizeof(uint32_t)) + { + Z |= S1[i] | S2[i] | X1[i] | X2[i]; + } + + const size_t ROUNDS = (KS.size() / 4) - 1; + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t t0, t1, t2, t3; + load_be(in + 16*i, t0, t1, t2, t3); + + t0 &= Z; + + for(size_t r = 0; r < ROUNDS; r += 2) + { + t0 ^= KS[4*r]; + t1 ^= KS[4*r+1]; + t2 ^= KS[4*r+2]; + t3 ^= KS[4*r+3]; + ARIA_FO(t0,t1,t2,t3); + + t0 ^= KS[4*r+4]; + t1 ^= KS[4*r+5]; + t2 ^= KS[4*r+6]; + t3 ^= KS[4*r+7]; + + if(r != ROUNDS-2) + ARIA_FE(t0,t1,t2,t3); + } + + out[16*i+ 0] = static_cast<uint8_t>(X1[get_byte(0,t0)] ) ^ get_byte(0, KS[4*ROUNDS]); + out[16*i+ 1] = static_cast<uint8_t>(X2[get_byte(1,t0)]>>8) ^ get_byte(1, KS[4*ROUNDS]); + out[16*i+ 2] = static_cast<uint8_t>(S1[get_byte(2,t0)] ) ^ get_byte(2, KS[4*ROUNDS]); + out[16*i+ 3] = static_cast<uint8_t>(S2[get_byte(3,t0)] ) ^ get_byte(3, KS[4*ROUNDS]); + out[16*i+ 4] = static_cast<uint8_t>(X1[get_byte(0,t1)] ) ^ get_byte(0, KS[4*ROUNDS+1]); + out[16*i+ 5] = static_cast<uint8_t>(X2[get_byte(1,t1)]>>8) ^ get_byte(1, KS[4*ROUNDS+1]); + out[16*i+ 6] = static_cast<uint8_t>(S1[get_byte(2,t1)] ) ^ get_byte(2, KS[4*ROUNDS+1]); + out[16*i+ 7] = static_cast<uint8_t>(S2[get_byte(3,t1)] ) ^ get_byte(3, KS[4*ROUNDS+1]); + out[16*i+ 8] = static_cast<uint8_t>(X1[get_byte(0,t2)] ) ^ get_byte(0, KS[4*ROUNDS+2]); + out[16*i+ 9] = static_cast<uint8_t>(X2[get_byte(1,t2)]>>8) ^ get_byte(1, KS[4*ROUNDS+2]); + out[16*i+10] = static_cast<uint8_t>(S1[get_byte(2,t2)] ) ^ get_byte(2, KS[4*ROUNDS+2]); + out[16*i+11] = static_cast<uint8_t>(S2[get_byte(3,t2)] ) ^ get_byte(3, KS[4*ROUNDS+2]); + out[16*i+12] = static_cast<uint8_t>(X1[get_byte(0,t3)] ) ^ get_byte(0, KS[4*ROUNDS+3]); + out[16*i+13] = static_cast<uint8_t>(X2[get_byte(1,t3)]>>8) ^ get_byte(1, KS[4*ROUNDS+3]); + out[16*i+14] = static_cast<uint8_t>(S1[get_byte(2,t3)] ) ^ get_byte(2, KS[4*ROUNDS+3]); + out[16*i+15] = static_cast<uint8_t>(S2[get_byte(3,t3)] ) ^ get_byte(3, KS[4*ROUNDS+3]); + } + } + +// n-bit right shift of Y XORed to X +template<size_t N> +inline void ARIA_ROL128(const uint32_t X[4], const uint32_t Y[4], uint32_t KS[4]) + { + // MSVC is not generating a "rotate immediate". Constify to help it along. + static const size_t Q = 4 - (N / 32); + static const size_t R = N % 32; + static_assert(R > 0 && R < 32, "Rotation in range for type"); + KS[0] = (X[0]) ^ ((Y[(Q )%4])>>R) ^ ((Y[(Q+3)%4])<<(32-R)); + KS[1] = (X[1]) ^ ((Y[(Q+1)%4])>>R) ^ ((Y[(Q )%4])<<(32-R)); + KS[2] = (X[2]) ^ ((Y[(Q+2)%4])>>R) ^ ((Y[(Q+1)%4])<<(32-R)); + KS[3] = (X[3]) ^ ((Y[(Q+3)%4])>>R) ^ ((Y[(Q+2)%4])<<(32-R)); + } + +/* +* ARIA Key Schedule +*/ +void key_schedule(secure_vector<uint32_t>& ERK, + secure_vector<uint32_t>& DRK, + const uint8_t key[], size_t length) + { + const uint32_t KRK[3][4] = { + {0x517cc1b7, 0x27220a94, 0xfe13abe8, 0xfa9a6ee0}, + {0x6db14acc, 0x9e21c820, 0xff28b1d5, 0xef5de2b0}, + {0xdb92371d, 0x2126e970, 0x03249775, 0x04e8c90e} + }; + + const size_t CK0 = (length / 8) - 2; + const size_t CK1 = (CK0 + 1) % 3; + const size_t CK2 = (CK1 + 1) % 3; + + uint32_t w0[4]; + uint32_t w1[4]; + uint32_t w2[4]; + uint32_t w3[4]; + + w0[0] = load_be<uint32_t>(key,0); + w0[1] = load_be<uint32_t>(key,1); + w0[2] = load_be<uint32_t>(key,2); + w0[3] = load_be<uint32_t>(key,3); + + w1[0] = w0[0] ^ KRK[CK0][0]; + w1[1] = w0[1] ^ KRK[CK0][1]; + w1[2] = w0[2] ^ KRK[CK0][2]; + w1[3] = w0[3] ^ KRK[CK0][3]; + + ARIA_FO(w1[0], w1[1], w1[2], w1[3]); + + if(length == 24 || length == 32) + { + w1[0] ^= load_be<uint32_t>(key,4); + w1[1] ^= load_be<uint32_t>(key,5); + } + if(length == 32) + { + w1[2] ^= load_be<uint32_t>(key,6); + w1[3] ^= load_be<uint32_t>(key,7); + } + + w2[0] = w1[0] ^ KRK[CK1][0]; + w2[1] = w1[1] ^ KRK[CK1][1]; + w2[2] = w1[2] ^ KRK[CK1][2]; + w2[3] = w1[3] ^ KRK[CK1][3]; + + ARIA_FE(w2[0], w2[1], w2[2], w2[3]); + + w2[0] ^= w0[0]; + w2[1] ^= w0[1]; + w2[2] ^= w0[2]; + w2[3] ^= w0[3]; + + w3[0] = w2[0] ^ KRK[CK2][0]; + w3[1] = w2[1] ^ KRK[CK2][1]; + w3[2] = w2[2] ^ KRK[CK2][2]; + w3[3] = w2[3] ^ KRK[CK2][3]; + + ARIA_FO(w3[0], w3[1], w3[2], w3[3]); + + w3[0] ^= w1[0]; + w3[1] ^= w1[1]; + w3[2] ^= w1[2]; + w3[3] ^= w1[3]; + + if(length == 16) + ERK.resize(4*13); + else if(length == 24) + ERK.resize(4*15); + else if(length == 32) + ERK.resize(4*17); + + ARIA_ROL128<19>(w0, w1, &ERK[ 0]); + ARIA_ROL128<19>(w1, w2, &ERK[ 4]); + ARIA_ROL128<19>(w2, w3, &ERK[ 8]); + ARIA_ROL128<19>(w3, w0, &ERK[12]); + ARIA_ROL128<31>(w0, w1, &ERK[16]); + ARIA_ROL128<31>(w1, w2, &ERK[20]); + ARIA_ROL128<31>(w2, w3, &ERK[24]); + ARIA_ROL128<31>(w3, w0, &ERK[28]); + ARIA_ROL128<67>(w0, w1, &ERK[32]); + ARIA_ROL128<67>(w1, w2, &ERK[36]); + ARIA_ROL128<67>(w2, w3, &ERK[40]); + ARIA_ROL128<67>(w3, w0, &ERK[44]); + ARIA_ROL128<97>(w0, w1, &ERK[48]); + + if(length == 24 || length == 32) + { + ARIA_ROL128<97>(w1, w2, &ERK[52]); + ARIA_ROL128<97>(w2, w3, &ERK[56]); + + if(length == 32) + { + ARIA_ROL128< 97>(w3, w0, &ERK[60]); + ARIA_ROL128<109>(w0, w1, &ERK[64]); + } + } + + // Now create the decryption key schedule + DRK.resize(ERK.size()); + + for(size_t i = 0; i != DRK.size(); i += 4) + { + DRK[i ] = ERK[ERK.size()-4-i]; + DRK[i+1] = ERK[ERK.size()-3-i]; + DRK[i+2] = ERK[ERK.size()-2-i]; + DRK[i+3] = ERK[ERK.size()-1-i]; + } + + for(size_t i = 4; i != DRK.size() - 4; i += 4) + { + for(size_t j = 0; j != 4; ++j) + { + DRK[i+j] = rotr<8>(DRK[i+j]) ^ + rotr<16>(DRK[i+j]) ^ + rotr<24>(DRK[i+j]); + } + + DRK[i+1] ^= DRK[i+2]; DRK[i+2] ^= DRK[i+3]; + DRK[i+0] ^= DRK[i+1]; DRK[i+3] ^= DRK[i+1]; + DRK[i+2] ^= DRK[i+0]; DRK[i+1] ^= DRK[i+2]; + + DRK[i+1] = ((DRK[i+1] << 8) & 0xFF00FF00) | ((DRK[i+1] >> 8) & 0x00FF00FF); + DRK[i+2] = rotr<16>(DRK[i+2]); + DRK[i+3] = reverse_bytes(DRK[i+3]); + + DRK[i+1] ^= DRK[i+2]; DRK[i+2] ^= DRK[i+3]; + DRK[i+0] ^= DRK[i+1]; DRK[i+3] ^= DRK[i+1]; + DRK[i+2] ^= DRK[i+0]; DRK[i+1] ^= DRK[i+2]; + } + } + +} + +} + +void ARIA_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_ERK.size() > 0); + ARIA_F::transform(in, out, blocks, m_ERK); + } + +void ARIA_192::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_ERK.size() > 0); + ARIA_F::transform(in, out, blocks, m_ERK); + } + +void ARIA_256::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_ERK.size() > 0); + ARIA_F::transform(in, out, blocks, m_ERK); + } + +void ARIA_128::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_DRK.size() > 0); + ARIA_F::transform(in, out, blocks, m_DRK); + } + +void ARIA_192::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_DRK.size() > 0); + ARIA_F::transform(in, out, blocks, m_DRK); + } + +void ARIA_256::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_DRK.size() > 0); + ARIA_F::transform(in, out, blocks, m_DRK); + } + +void ARIA_128::key_schedule(const uint8_t key[], size_t length) + { + ARIA_F::key_schedule(m_ERK, m_DRK, key, length); + } + +void ARIA_192::key_schedule(const uint8_t key[], size_t length) + { + ARIA_F::key_schedule(m_ERK, m_DRK, key, length); + } + +void ARIA_256::key_schedule(const uint8_t key[], size_t length) + { + ARIA_F::key_schedule(m_ERK, m_DRK, key, length); + } + +void ARIA_128::clear() + { + zap(m_ERK); + zap(m_DRK); + } + +void ARIA_192::clear() + { + zap(m_ERK); + zap(m_DRK); + } + +void ARIA_256::clear() + { + zap(m_ERK); + zap(m_DRK); + } + +} diff --git a/comm/third_party/botan/src/lib/block/aria/aria.h b/comm/third_party/botan/src/lib/block/aria/aria.h new file mode 100644 index 0000000000..507226b7d3 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/aria/aria.h @@ -0,0 +1,84 @@ +/* +* ARIA +* (C) 2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +* +* This ARIA implementation is based on the 32-bit implementation by Aaram Yun from the +* National Security Research Institute, KOREA. Aaram Yun's implementation is based on +* the 8-bit implementation by Jin Hong. The source files are available in ARIA.zip from +* the Korea Internet & Security Agency website. +* <A HREF="https://tools.ietf.org/html/rfc5794">RFC 5794, A Description of the ARIA Encryption Algorithm</A>, +* <A HREF="http://seed.kisa.or.kr/iwt/ko/bbs/EgovReferenceList.do?bbsId=BBSMSTR_000000000002">Korea +* Internet & Security Agency homepage</A> +*/ + +#ifndef BOTAN_ARIA_H_ +#define BOTAN_ARIA_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(aria.h) + +namespace Botan { + +/** +* ARIA-128 +*/ +class BOTAN_PUBLIC_API(2,3) ARIA_128 final : public Block_Cipher_Fixed_Params<16, 16> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string name() const override { return "ARIA-128"; } + BlockCipher* clone() const override { return new ARIA_128; } + private: + void key_schedule(const uint8_t key[], size_t length) override; + + // Encryption and Decryption round keys. + secure_vector<uint32_t> m_ERK, m_DRK; + }; + +/** +* ARIA-192 +*/ +class BOTAN_PUBLIC_API(2,3) ARIA_192 final : public Block_Cipher_Fixed_Params<16, 24> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string name() const override { return "ARIA-192"; } + BlockCipher* clone() const override { return new ARIA_192; } + private: + void key_schedule(const uint8_t key[], size_t length) override; + + // Encryption and Decryption round keys. + secure_vector<uint32_t> m_ERK, m_DRK; + }; + +/** +* ARIA-256 +*/ +class BOTAN_PUBLIC_API(2,3) ARIA_256 final : public Block_Cipher_Fixed_Params<16, 32> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string name() const override { return "ARIA-256"; } + BlockCipher* clone() const override { return new ARIA_256; } + private: + void key_schedule(const uint8_t key[], size_t length) override; + + // Encryption and Decryption round keys. + secure_vector<uint32_t> m_ERK, m_DRK; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/aria/info.txt b/comm/third_party/botan/src/lib/block/aria/info.txt new file mode 100644 index 0000000000..78c16726c8 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/aria/info.txt @@ -0,0 +1,7 @@ +<defines> +ARIA -> 20170415 +</defines> + +<header:public> +aria.h +</header:public> diff --git a/comm/third_party/botan/src/lib/block/block_cipher.cpp b/comm/third_party/botan/src/lib/block/block_cipher.cpp new file mode 100644 index 0000000000..fb0564646e --- /dev/null +++ b/comm/third_party/botan/src/lib/block/block_cipher.cpp @@ -0,0 +1,363 @@ +/* +* Block Ciphers +* (C) 2015 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/block_cipher.h> +#include <botan/scan_name.h> +#include <botan/exceptn.h> + +#if defined(BOTAN_HAS_AES) + #include <botan/aes.h> +#endif + +#if defined(BOTAN_HAS_ARIA) + #include <botan/aria.h> +#endif + +#if defined(BOTAN_HAS_BLOWFISH) + #include <botan/blowfish.h> +#endif + +#if defined(BOTAN_HAS_CAMELLIA) + #include <botan/camellia.h> +#endif + +#if defined(BOTAN_HAS_CAST_128) + #include <botan/cast128.h> +#endif + +#if defined(BOTAN_HAS_CAST_256) + #include <botan/cast256.h> +#endif + +#if defined(BOTAN_HAS_CASCADE) + #include <botan/cascade.h> +#endif + +#if defined(BOTAN_HAS_DES) + #include <botan/des.h> + #include <botan/desx.h> +#endif + +#if defined(BOTAN_HAS_GOST_28147_89) + #include <botan/gost_28147.h> +#endif + +#if defined(BOTAN_HAS_IDEA) + #include <botan/idea.h> +#endif + +#if defined(BOTAN_HAS_KASUMI) + #include <botan/kasumi.h> +#endif + +#if defined(BOTAN_HAS_LION) + #include <botan/lion.h> +#endif + +#if defined(BOTAN_HAS_MISTY1) + #include <botan/misty1.h> +#endif + +#if defined(BOTAN_HAS_NOEKEON) + #include <botan/noekeon.h> +#endif + +#if defined(BOTAN_HAS_SEED) + #include <botan/seed.h> +#endif + +#if defined(BOTAN_HAS_SERPENT) + #include <botan/serpent.h> +#endif + +#if defined(BOTAN_HAS_SHACAL2) + #include <botan/shacal2.h> +#endif + +#if defined(BOTAN_HAS_SM4) + #include <botan/sm4.h> +#endif + +#if defined(BOTAN_HAS_TWOFISH) + #include <botan/twofish.h> +#endif + +#if defined(BOTAN_HAS_THREEFISH_512) + #include <botan/threefish_512.h> +#endif + +#if defined(BOTAN_HAS_XTEA) + #include <botan/xtea.h> +#endif + +#if defined(BOTAN_HAS_OPENSSL) + #include <botan/internal/openssl.h> +#endif + +#if defined(BOTAN_HAS_COMMONCRYPTO) + #include <botan/internal/commoncrypto.h> +#endif + +namespace Botan { + +std::unique_ptr<BlockCipher> +BlockCipher::create(const std::string& algo, + const std::string& provider) + { +#if defined(BOTAN_HAS_COMMONCRYPTO) + if(provider.empty() || provider == "commoncrypto") + { + if(auto bc = make_commoncrypto_block_cipher(algo)) + return bc; + + if(!provider.empty()) + return nullptr; + } +#endif + +#if defined(BOTAN_HAS_OPENSSL) + if(provider.empty() || provider == "openssl") + { + if(auto bc = make_openssl_block_cipher(algo)) + return bc; + + if(!provider.empty()) + return nullptr; + } +#endif + + // TODO: CryptoAPI + // TODO: /dev/crypto + + // Only base providers from here on out + if(provider.empty() == false && provider != "base") + return nullptr; + +#if defined(BOTAN_HAS_AES) + if(algo == "AES-128") + { + return std::unique_ptr<BlockCipher>(new AES_128); + } + + if(algo == "AES-192") + { + return std::unique_ptr<BlockCipher>(new AES_192); + } + + if(algo == "AES-256") + { + return std::unique_ptr<BlockCipher>(new AES_256); + } +#endif + +#if defined(BOTAN_HAS_ARIA) + if(algo == "ARIA-128") + { + return std::unique_ptr<BlockCipher>(new ARIA_128); + } + + if(algo == "ARIA-192") + { + return std::unique_ptr<BlockCipher>(new ARIA_192); + } + + if(algo == "ARIA-256") + { + return std::unique_ptr<BlockCipher>(new ARIA_256); + } +#endif + +#if defined(BOTAN_HAS_SERPENT) + if(algo == "Serpent") + { + return std::unique_ptr<BlockCipher>(new Serpent); + } +#endif + +#if defined(BOTAN_HAS_SHACAL2) + if(algo == "SHACAL2") + { + return std::unique_ptr<BlockCipher>(new SHACAL2); + } +#endif + +#if defined(BOTAN_HAS_TWOFISH) + if(algo == "Twofish") + { + return std::unique_ptr<BlockCipher>(new Twofish); + } +#endif + +#if defined(BOTAN_HAS_THREEFISH_512) + if(algo == "Threefish-512") + { + return std::unique_ptr<BlockCipher>(new Threefish_512); + } +#endif + +#if defined(BOTAN_HAS_BLOWFISH) + if(algo == "Blowfish") + { + return std::unique_ptr<BlockCipher>(new Blowfish); + } +#endif + +#if defined(BOTAN_HAS_CAMELLIA) + if(algo == "Camellia-128") + { + return std::unique_ptr<BlockCipher>(new Camellia_128); + } + + if(algo == "Camellia-192") + { + return std::unique_ptr<BlockCipher>(new Camellia_192); + } + + if(algo == "Camellia-256") + { + return std::unique_ptr<BlockCipher>(new Camellia_256); + } +#endif + +#if defined(BOTAN_HAS_DES) + if(algo == "DES") + { + return std::unique_ptr<BlockCipher>(new DES); + } + + if(algo == "DESX") + { + return std::unique_ptr<BlockCipher>(new DESX); + } + + if(algo == "TripleDES" || algo == "3DES" || algo == "DES-EDE") + { + return std::unique_ptr<BlockCipher>(new TripleDES); + } +#endif + +#if defined(BOTAN_HAS_NOEKEON) + if(algo == "Noekeon") + { + return std::unique_ptr<BlockCipher>(new Noekeon); + } +#endif + +#if defined(BOTAN_HAS_CAST_128) + if(algo == "CAST-128" || algo == "CAST5") + { + return std::unique_ptr<BlockCipher>(new CAST_128); + } +#endif + +#if defined(BOTAN_HAS_CAST_256) + if(algo == "CAST-256") + { + return std::unique_ptr<BlockCipher>(new CAST_256); + } +#endif + +#if defined(BOTAN_HAS_IDEA) + if(algo == "IDEA") + { + return std::unique_ptr<BlockCipher>(new IDEA); + } +#endif + +#if defined(BOTAN_HAS_KASUMI) + if(algo == "KASUMI") + { + return std::unique_ptr<BlockCipher>(new KASUMI); + } +#endif + +#if defined(BOTAN_HAS_MISTY1) + if(algo == "MISTY1") + { + return std::unique_ptr<BlockCipher>(new MISTY1); + } +#endif + +#if defined(BOTAN_HAS_SEED) + if(algo == "SEED") + { + return std::unique_ptr<BlockCipher>(new SEED); + } +#endif + +#if defined(BOTAN_HAS_SM4) + if(algo == "SM4") + { + return std::unique_ptr<BlockCipher>(new SM4); + } +#endif + +#if defined(BOTAN_HAS_XTEA) + if(algo == "XTEA") + { + return std::unique_ptr<BlockCipher>(new XTEA); + } +#endif + + const SCAN_Name req(algo); + +#if defined(BOTAN_HAS_GOST_28147_89) + if(req.algo_name() == "GOST-28147-89") + { + return std::unique_ptr<BlockCipher>(new GOST_28147_89(req.arg(0, "R3411_94_TestParam"))); + } +#endif + +#if defined(BOTAN_HAS_CASCADE) + if(req.algo_name() == "Cascade" && req.arg_count() == 2) + { + std::unique_ptr<BlockCipher> c1(BlockCipher::create(req.arg(0))); + std::unique_ptr<BlockCipher> c2(BlockCipher::create(req.arg(1))); + + if(c1 && c2) + return std::unique_ptr<BlockCipher>(new Cascade_Cipher(c1.release(), c2.release())); + } +#endif + +#if defined(BOTAN_HAS_LION) + if(req.algo_name() == "Lion" && req.arg_count_between(2, 3)) + { + std::unique_ptr<HashFunction> hash(HashFunction::create(req.arg(0))); + std::unique_ptr<StreamCipher> stream(StreamCipher::create(req.arg(1))); + + if(hash && stream) + { + const size_t block_size = req.arg_as_integer(2, 1024); + return std::unique_ptr<BlockCipher>(new Lion(hash.release(), stream.release(), block_size)); + } + } +#endif + + BOTAN_UNUSED(req); + BOTAN_UNUSED(provider); + + return nullptr; + } + +//static +std::unique_ptr<BlockCipher> +BlockCipher::create_or_throw(const std::string& algo, + const std::string& provider) + { + if(auto bc = BlockCipher::create(algo, provider)) + { + return bc; + } + throw Lookup_Error("Block cipher", algo, provider); + } + +std::vector<std::string> BlockCipher::providers(const std::string& algo) + { + return probe_providers_of<BlockCipher>(algo, { "base", "openssl", "commoncrypto" }); + } + +} diff --git a/comm/third_party/botan/src/lib/block/block_cipher.h b/comm/third_party/botan/src/lib/block/block_cipher.h new file mode 100644 index 0000000000..68cdd1afe0 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/block_cipher.h @@ -0,0 +1,254 @@ +/* +* Block Cipher Base Class +* (C) 1999-2009 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_BLOCK_CIPHER_H_ +#define BOTAN_BLOCK_CIPHER_H_ + +#include <botan/sym_algo.h> +#include <string> +#include <memory> +#include <vector> + +namespace Botan { + +/** +* This class represents a block cipher object. +*/ +class BOTAN_PUBLIC_API(2,0) BlockCipher : public SymmetricAlgorithm + { + public: + + /** + * Create an instance based on a name + * If provider is empty then best available is chosen. + * @param algo_spec algorithm name + * @param provider provider implementation to choose + * @return a null pointer if the algo/provider combination cannot be found + */ + static std::unique_ptr<BlockCipher> + create(const std::string& algo_spec, + const std::string& provider = ""); + + /** + * Create an instance based on a name, or throw if the + * algo/provider combination cannot be found. If provider is + * empty then best available is chosen. + */ + static std::unique_ptr<BlockCipher> + create_or_throw(const std::string& algo_spec, + const std::string& provider = ""); + + /** + * @return list of available providers for this algorithm, empty if not available + * @param algo_spec algorithm name + */ + static std::vector<std::string> providers(const std::string& algo_spec); + + /** + * @return block size of this algorithm + */ + virtual size_t block_size() const = 0; + + /** + * @return native parallelism of this cipher in blocks + */ + virtual size_t parallelism() const { return 1; } + + /** + * @return prefererred parallelism of this cipher in bytes + */ + size_t parallel_bytes() const + { + return parallelism() * block_size() * BOTAN_BLOCK_CIPHER_PAR_MULT; + } + + /** + * @return provider information about this implementation. Default is "base", + * might also return "sse2", "avx2", "openssl", or some other arbitrary string. + */ + virtual std::string provider() const { return "base"; } + + /** + * Encrypt a block. + * @param in The plaintext block to be encrypted as a byte array. + * Must be of length block_size(). + * @param out The byte array designated to hold the encrypted block. + * Must be of length block_size(). + */ + void encrypt(const uint8_t in[], uint8_t out[]) const + { encrypt_n(in, out, 1); } + + /** + * Decrypt a block. + * @param in The ciphertext block to be decypted as a byte array. + * Must be of length block_size(). + * @param out The byte array designated to hold the decrypted block. + * Must be of length block_size(). + */ + void decrypt(const uint8_t in[], uint8_t out[]) const + { decrypt_n(in, out, 1); } + + /** + * Encrypt a block. + * @param block the plaintext block to be encrypted + * Must be of length block_size(). Will hold the result when the function + * has finished. + */ + void encrypt(uint8_t block[]) const { encrypt_n(block, block, 1); } + + /** + * Decrypt a block. + * @param block the ciphertext block to be decrypted + * Must be of length block_size(). Will hold the result when the function + * has finished. + */ + void decrypt(uint8_t block[]) const { decrypt_n(block, block, 1); } + + /** + * Encrypt one or more blocks + * @param block the input/output buffer (multiple of block_size()) + */ + template<typename Alloc> + void encrypt(std::vector<uint8_t, Alloc>& block) const + { + return encrypt_n(block.data(), block.data(), block.size() / block_size()); + } + + /** + * Decrypt one or more blocks + * @param block the input/output buffer (multiple of block_size()) + */ + template<typename Alloc> + void decrypt(std::vector<uint8_t, Alloc>& block) const + { + return decrypt_n(block.data(), block.data(), block.size() / block_size()); + } + + /** + * Encrypt one or more blocks + * @param in the input buffer (multiple of block_size()) + * @param out the output buffer (same size as in) + */ + template<typename Alloc, typename Alloc2> + void encrypt(const std::vector<uint8_t, Alloc>& in, + std::vector<uint8_t, Alloc2>& out) const + { + return encrypt_n(in.data(), out.data(), in.size() / block_size()); + } + + /** + * Decrypt one or more blocks + * @param in the input buffer (multiple of block_size()) + * @param out the output buffer (same size as in) + */ + template<typename Alloc, typename Alloc2> + void decrypt(const std::vector<uint8_t, Alloc>& in, + std::vector<uint8_t, Alloc2>& out) const + { + return decrypt_n(in.data(), out.data(), in.size() / block_size()); + } + + /** + * Encrypt one or more blocks + * @param in the input buffer (multiple of block_size()) + * @param out the output buffer (same size as in) + * @param blocks the number of blocks to process + */ + virtual void encrypt_n(const uint8_t in[], uint8_t out[], + size_t blocks) const = 0; + + /** + * Decrypt one or more blocks + * @param in the input buffer (multiple of block_size()) + * @param out the output buffer (same size as in) + * @param blocks the number of blocks to process + */ + virtual void decrypt_n(const uint8_t in[], uint8_t out[], + size_t blocks) const = 0; + + virtual void encrypt_n_xex(uint8_t data[], + const uint8_t mask[], + size_t blocks) const + { + const size_t BS = block_size(); + xor_buf(data, mask, blocks * BS); + encrypt_n(data, data, blocks); + xor_buf(data, mask, blocks * BS); + } + + virtual void decrypt_n_xex(uint8_t data[], + const uint8_t mask[], + size_t blocks) const + { + const size_t BS = block_size(); + xor_buf(data, mask, blocks * BS); + decrypt_n(data, data, blocks); + xor_buf(data, mask, blocks * BS); + } + + /** + * @return new object representing the same algorithm as *this + */ + virtual BlockCipher* clone() const = 0; + + virtual ~BlockCipher() = default; + }; + +/** +* Tweakable block ciphers allow setting a tweak which is a non-keyed +* value which affects the encryption/decryption operation. +*/ +class BOTAN_PUBLIC_API(2,8) Tweakable_Block_Cipher : public BlockCipher + { + public: + /** + * Set the tweak value. This must be called after setting a key. The value + * persists until either set_tweak, set_key, or clear is called. + * Different algorithms support different tweak length(s). If called with + * an unsupported length, Invalid_Argument will be thrown. + */ + virtual void set_tweak(const uint8_t tweak[], size_t len) = 0; + }; + +/** +* Represents a block cipher with a single fixed block size +*/ +template<size_t BS, size_t KMIN, size_t KMAX = 0, size_t KMOD = 1, typename BaseClass = BlockCipher> +class Block_Cipher_Fixed_Params : public BaseClass + { + public: + enum { BLOCK_SIZE = BS }; + size_t block_size() const final override { return BS; } + + // override to take advantage of compile time constant block size + void encrypt_n_xex(uint8_t data[], + const uint8_t mask[], + size_t blocks) const final override + { + xor_buf(data, mask, blocks * BS); + this->encrypt_n(data, data, blocks); + xor_buf(data, mask, blocks * BS); + } + + void decrypt_n_xex(uint8_t data[], + const uint8_t mask[], + size_t blocks) const final override + { + xor_buf(data, mask, blocks * BS); + this->decrypt_n(data, data, blocks); + xor_buf(data, mask, blocks * BS); + } + + Key_Length_Specification key_spec() const final override + { + return Key_Length_Specification(KMIN, KMAX, KMOD); + } + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/blowfish/blowfish.cpp b/comm/third_party/botan/src/lib/block/blowfish/blowfish.cpp new file mode 100644 index 0000000000..ecb9f82e36 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/blowfish/blowfish.cpp @@ -0,0 +1,456 @@ +/* +* Blowfish +* (C) 1999-2011,2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/blowfish.h> +#include <botan/loadstor.h> + +namespace Botan { + +namespace { + +const uint32_t P_INIT[18] = { + 0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344, 0xA4093822, 0x299F31D0, + 0x082EFA98, 0xEC4E6C89, 0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C, + 0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917, 0x9216D5D9, 0x8979FB1B }; + +const uint32_t S_INIT[1024] = { + 0xD1310BA6, 0x98DFB5AC, 0x2FFD72DB, 0xD01ADFB7, 0xB8E1AFED, 0x6A267E96, + 0xBA7C9045, 0xF12C7F99, 0x24A19947, 0xB3916CF7, 0x0801F2E2, 0x858EFC16, + 0x636920D8, 0x71574E69, 0xA458FEA3, 0xF4933D7E, 0x0D95748F, 0x728EB658, + 0x718BCD58, 0x82154AEE, 0x7B54A41D, 0xC25A59B5, 0x9C30D539, 0x2AF26013, + 0xC5D1B023, 0x286085F0, 0xCA417918, 0xB8DB38EF, 0x8E79DCB0, 0x603A180E, + 0x6C9E0E8B, 0xB01E8A3E, 0xD71577C1, 0xBD314B27, 0x78AF2FDA, 0x55605C60, + 0xE65525F3, 0xAA55AB94, 0x57489862, 0x63E81440, 0x55CA396A, 0x2AAB10B6, + 0xB4CC5C34, 0x1141E8CE, 0xA15486AF, 0x7C72E993, 0xB3EE1411, 0x636FBC2A, + 0x2BA9C55D, 0x741831F6, 0xCE5C3E16, 0x9B87931E, 0xAFD6BA33, 0x6C24CF5C, + 0x7A325381, 0x28958677, 0x3B8F4898, 0x6B4BB9AF, 0xC4BFE81B, 0x66282193, + 0x61D809CC, 0xFB21A991, 0x487CAC60, 0x5DEC8032, 0xEF845D5D, 0xE98575B1, + 0xDC262302, 0xEB651B88, 0x23893E81, 0xD396ACC5, 0x0F6D6FF3, 0x83F44239, + 0x2E0B4482, 0xA4842004, 0x69C8F04A, 0x9E1F9B5E, 0x21C66842, 0xF6E96C9A, + 0x670C9C61, 0xABD388F0, 0x6A51A0D2, 0xD8542F68, 0x960FA728, 0xAB5133A3, + 0x6EEF0B6C, 0x137A3BE4, 0xBA3BF050, 0x7EFB2A98, 0xA1F1651D, 0x39AF0176, + 0x66CA593E, 0x82430E88, 0x8CEE8619, 0x456F9FB4, 0x7D84A5C3, 0x3B8B5EBE, + 0xE06F75D8, 0x85C12073, 0x401A449F, 0x56C16AA6, 0x4ED3AA62, 0x363F7706, + 0x1BFEDF72, 0x429B023D, 0x37D0D724, 0xD00A1248, 0xDB0FEAD3, 0x49F1C09B, + 0x075372C9, 0x80991B7B, 0x25D479D8, 0xF6E8DEF7, 0xE3FE501A, 0xB6794C3B, + 0x976CE0BD, 0x04C006BA, 0xC1A94FB6, 0x409F60C4, 0x5E5C9EC2, 0x196A2463, + 0x68FB6FAF, 0x3E6C53B5, 0x1339B2EB, 0x3B52EC6F, 0x6DFC511F, 0x9B30952C, + 0xCC814544, 0xAF5EBD09, 0xBEE3D004, 0xDE334AFD, 0x660F2807, 0x192E4BB3, + 0xC0CBA857, 0x45C8740F, 0xD20B5F39, 0xB9D3FBDB, 0x5579C0BD, 0x1A60320A, + 0xD6A100C6, 0x402C7279, 0x679F25FE, 0xFB1FA3CC, 0x8EA5E9F8, 0xDB3222F8, + 0x3C7516DF, 0xFD616B15, 0x2F501EC8, 0xAD0552AB, 0x323DB5FA, 0xFD238760, + 0x53317B48, 0x3E00DF82, 0x9E5C57BB, 0xCA6F8CA0, 0x1A87562E, 0xDF1769DB, + 0xD542A8F6, 0x287EFFC3, 0xAC6732C6, 0x8C4F5573, 0x695B27B0, 0xBBCA58C8, + 0xE1FFA35D, 0xB8F011A0, 0x10FA3D98, 0xFD2183B8, 0x4AFCB56C, 0x2DD1D35B, + 0x9A53E479, 0xB6F84565, 0xD28E49BC, 0x4BFB9790, 0xE1DDF2DA, 0xA4CB7E33, + 0x62FB1341, 0xCEE4C6E8, 0xEF20CADA, 0x36774C01, 0xD07E9EFE, 0x2BF11FB4, + 0x95DBDA4D, 0xAE909198, 0xEAAD8E71, 0x6B93D5A0, 0xD08ED1D0, 0xAFC725E0, + 0x8E3C5B2F, 0x8E7594B7, 0x8FF6E2FB, 0xF2122B64, 0x8888B812, 0x900DF01C, + 0x4FAD5EA0, 0x688FC31C, 0xD1CFF191, 0xB3A8C1AD, 0x2F2F2218, 0xBE0E1777, + 0xEA752DFE, 0x8B021FA1, 0xE5A0CC0F, 0xB56F74E8, 0x18ACF3D6, 0xCE89E299, + 0xB4A84FE0, 0xFD13E0B7, 0x7CC43B81, 0xD2ADA8D9, 0x165FA266, 0x80957705, + 0x93CC7314, 0x211A1477, 0xE6AD2065, 0x77B5FA86, 0xC75442F5, 0xFB9D35CF, + 0xEBCDAF0C, 0x7B3E89A0, 0xD6411BD3, 0xAE1E7E49, 0x00250E2D, 0x2071B35E, + 0x226800BB, 0x57B8E0AF, 0x2464369B, 0xF009B91E, 0x5563911D, 0x59DFA6AA, + 0x78C14389, 0xD95A537F, 0x207D5BA2, 0x02E5B9C5, 0x83260376, 0x6295CFA9, + 0x11C81968, 0x4E734A41, 0xB3472DCA, 0x7B14A94A, 0x1B510052, 0x9A532915, + 0xD60F573F, 0xBC9BC6E4, 0x2B60A476, 0x81E67400, 0x08BA6FB5, 0x571BE91F, + 0xF296EC6B, 0x2A0DD915, 0xB6636521, 0xE7B9F9B6, 0xFF34052E, 0xC5855664, + 0x53B02D5D, 0xA99F8FA1, 0x08BA4799, 0x6E85076A, 0x4B7A70E9, 0xB5B32944, + 0xDB75092E, 0xC4192623, 0xAD6EA6B0, 0x49A7DF7D, 0x9CEE60B8, 0x8FEDB266, + 0xECAA8C71, 0x699A17FF, 0x5664526C, 0xC2B19EE1, 0x193602A5, 0x75094C29, + 0xA0591340, 0xE4183A3E, 0x3F54989A, 0x5B429D65, 0x6B8FE4D6, 0x99F73FD6, + 0xA1D29C07, 0xEFE830F5, 0x4D2D38E6, 0xF0255DC1, 0x4CDD2086, 0x8470EB26, + 0x6382E9C6, 0x021ECC5E, 0x09686B3F, 0x3EBAEFC9, 0x3C971814, 0x6B6A70A1, + 0x687F3584, 0x52A0E286, 0xB79C5305, 0xAA500737, 0x3E07841C, 0x7FDEAE5C, + 0x8E7D44EC, 0x5716F2B8, 0xB03ADA37, 0xF0500C0D, 0xF01C1F04, 0x0200B3FF, + 0xAE0CF51A, 0x3CB574B2, 0x25837A58, 0xDC0921BD, 0xD19113F9, 0x7CA92FF6, + 0x94324773, 0x22F54701, 0x3AE5E581, 0x37C2DADC, 0xC8B57634, 0x9AF3DDA7, + 0xA9446146, 0x0FD0030E, 0xECC8C73E, 0xA4751E41, 0xE238CD99, 0x3BEA0E2F, + 0x3280BBA1, 0x183EB331, 0x4E548B38, 0x4F6DB908, 0x6F420D03, 0xF60A04BF, + 0x2CB81290, 0x24977C79, 0x5679B072, 0xBCAF89AF, 0xDE9A771F, 0xD9930810, + 0xB38BAE12, 0xDCCF3F2E, 0x5512721F, 0x2E6B7124, 0x501ADDE6, 0x9F84CD87, + 0x7A584718, 0x7408DA17, 0xBC9F9ABC, 0xE94B7D8C, 0xEC7AEC3A, 0xDB851DFA, + 0x63094366, 0xC464C3D2, 0xEF1C1847, 0x3215D908, 0xDD433B37, 0x24C2BA16, + 0x12A14D43, 0x2A65C451, 0x50940002, 0x133AE4DD, 0x71DFF89E, 0x10314E55, + 0x81AC77D6, 0x5F11199B, 0x043556F1, 0xD7A3C76B, 0x3C11183B, 0x5924A509, + 0xF28FE6ED, 0x97F1FBFA, 0x9EBABF2C, 0x1E153C6E, 0x86E34570, 0xEAE96FB1, + 0x860E5E0A, 0x5A3E2AB3, 0x771FE71C, 0x4E3D06FA, 0x2965DCB9, 0x99E71D0F, + 0x803E89D6, 0x5266C825, 0x2E4CC978, 0x9C10B36A, 0xC6150EBA, 0x94E2EA78, + 0xA5FC3C53, 0x1E0A2DF4, 0xF2F74EA7, 0x361D2B3D, 0x1939260F, 0x19C27960, + 0x5223A708, 0xF71312B6, 0xEBADFE6E, 0xEAC31F66, 0xE3BC4595, 0xA67BC883, + 0xB17F37D1, 0x018CFF28, 0xC332DDEF, 0xBE6C5AA5, 0x65582185, 0x68AB9802, + 0xEECEA50F, 0xDB2F953B, 0x2AEF7DAD, 0x5B6E2F84, 0x1521B628, 0x29076170, + 0xECDD4775, 0x619F1510, 0x13CCA830, 0xEB61BD96, 0x0334FE1E, 0xAA0363CF, + 0xB5735C90, 0x4C70A239, 0xD59E9E0B, 0xCBAADE14, 0xEECC86BC, 0x60622CA7, + 0x9CAB5CAB, 0xB2F3846E, 0x648B1EAF, 0x19BDF0CA, 0xA02369B9, 0x655ABB50, + 0x40685A32, 0x3C2AB4B3, 0x319EE9D5, 0xC021B8F7, 0x9B540B19, 0x875FA099, + 0x95F7997E, 0x623D7DA8, 0xF837889A, 0x97E32D77, 0x11ED935F, 0x16681281, + 0x0E358829, 0xC7E61FD6, 0x96DEDFA1, 0x7858BA99, 0x57F584A5, 0x1B227263, + 0x9B83C3FF, 0x1AC24696, 0xCDB30AEB, 0x532E3054, 0x8FD948E4, 0x6DBC3128, + 0x58EBF2EF, 0x34C6FFEA, 0xFE28ED61, 0xEE7C3C73, 0x5D4A14D9, 0xE864B7E3, + 0x42105D14, 0x203E13E0, 0x45EEE2B6, 0xA3AAABEA, 0xDB6C4F15, 0xFACB4FD0, + 0xC742F442, 0xEF6ABBB5, 0x654F3B1D, 0x41CD2105, 0xD81E799E, 0x86854DC7, + 0xE44B476A, 0x3D816250, 0xCF62A1F2, 0x5B8D2646, 0xFC8883A0, 0xC1C7B6A3, + 0x7F1524C3, 0x69CB7492, 0x47848A0B, 0x5692B285, 0x095BBF00, 0xAD19489D, + 0x1462B174, 0x23820E00, 0x58428D2A, 0x0C55F5EA, 0x1DADF43E, 0x233F7061, + 0x3372F092, 0x8D937E41, 0xD65FECF1, 0x6C223BDB, 0x7CDE3759, 0xCBEE7460, + 0x4085F2A7, 0xCE77326E, 0xA6078084, 0x19F8509E, 0xE8EFD855, 0x61D99735, + 0xA969A7AA, 0xC50C06C2, 0x5A04ABFC, 0x800BCADC, 0x9E447A2E, 0xC3453484, + 0xFDD56705, 0x0E1E9EC9, 0xDB73DBD3, 0x105588CD, 0x675FDA79, 0xE3674340, + 0xC5C43465, 0x713E38D8, 0x3D28F89E, 0xF16DFF20, 0x153E21E7, 0x8FB03D4A, + 0xE6E39F2B, 0xDB83ADF7, 0xE93D5A68, 0x948140F7, 0xF64C261C, 0x94692934, + 0x411520F7, 0x7602D4F7, 0xBCF46B2E, 0xD4A20068, 0xD4082471, 0x3320F46A, + 0x43B7D4B7, 0x500061AF, 0x1E39F62E, 0x97244546, 0x14214F74, 0xBF8B8840, + 0x4D95FC1D, 0x96B591AF, 0x70F4DDD3, 0x66A02F45, 0xBFBC09EC, 0x03BD9785, + 0x7FAC6DD0, 0x31CB8504, 0x96EB27B3, 0x55FD3941, 0xDA2547E6, 0xABCA0A9A, + 0x28507825, 0x530429F4, 0x0A2C86DA, 0xE9B66DFB, 0x68DC1462, 0xD7486900, + 0x680EC0A4, 0x27A18DEE, 0x4F3FFEA2, 0xE887AD8C, 0xB58CE006, 0x7AF4D6B6, + 0xAACE1E7C, 0xD3375FEC, 0xCE78A399, 0x406B2A42, 0x20FE9E35, 0xD9F385B9, + 0xEE39D7AB, 0x3B124E8B, 0x1DC9FAF7, 0x4B6D1856, 0x26A36631, 0xEAE397B2, + 0x3A6EFA74, 0xDD5B4332, 0x6841E7F7, 0xCA7820FB, 0xFB0AF54E, 0xD8FEB397, + 0x454056AC, 0xBA489527, 0x55533A3A, 0x20838D87, 0xFE6BA9B7, 0xD096954B, + 0x55A867BC, 0xA1159A58, 0xCCA92963, 0x99E1DB33, 0xA62A4A56, 0x3F3125F9, + 0x5EF47E1C, 0x9029317C, 0xFDF8E802, 0x04272F70, 0x80BB155C, 0x05282CE3, + 0x95C11548, 0xE4C66D22, 0x48C1133F, 0xC70F86DC, 0x07F9C9EE, 0x41041F0F, + 0x404779A4, 0x5D886E17, 0x325F51EB, 0xD59BC0D1, 0xF2BCC18F, 0x41113564, + 0x257B7834, 0x602A9C60, 0xDFF8E8A3, 0x1F636C1B, 0x0E12B4C2, 0x02E1329E, + 0xAF664FD1, 0xCAD18115, 0x6B2395E0, 0x333E92E1, 0x3B240B62, 0xEEBEB922, + 0x85B2A20E, 0xE6BA0D99, 0xDE720C8C, 0x2DA2F728, 0xD0127845, 0x95B794FD, + 0x647D0862, 0xE7CCF5F0, 0x5449A36F, 0x877D48FA, 0xC39DFD27, 0xF33E8D1E, + 0x0A476341, 0x992EFF74, 0x3A6F6EAB, 0xF4F8FD37, 0xA812DC60, 0xA1EBDDF8, + 0x991BE14C, 0xDB6E6B0D, 0xC67B5510, 0x6D672C37, 0x2765D43B, 0xDCD0E804, + 0xF1290DC7, 0xCC00FFA3, 0xB5390F92, 0x690FED0B, 0x667B9FFB, 0xCEDB7D9C, + 0xA091CF0B, 0xD9155EA3, 0xBB132F88, 0x515BAD24, 0x7B9479BF, 0x763BD6EB, + 0x37392EB3, 0xCC115979, 0x8026E297, 0xF42E312D, 0x6842ADA7, 0xC66A2B3B, + 0x12754CCC, 0x782EF11C, 0x6A124237, 0xB79251E7, 0x06A1BBE6, 0x4BFB6350, + 0x1A6B1018, 0x11CAEDFA, 0x3D25BDD8, 0xE2E1C3C9, 0x44421659, 0x0A121386, + 0xD90CEC6E, 0xD5ABEA2A, 0x64AF674E, 0xDA86A85F, 0xBEBFE988, 0x64E4C3FE, + 0x9DBC8057, 0xF0F7C086, 0x60787BF8, 0x6003604D, 0xD1FD8346, 0xF6381FB0, + 0x7745AE04, 0xD736FCCC, 0x83426B33, 0xF01EAB71, 0xB0804187, 0x3C005E5F, + 0x77A057BE, 0xBDE8AE24, 0x55464299, 0xBF582E61, 0x4E58F48F, 0xF2DDFDA2, + 0xF474EF38, 0x8789BDC2, 0x5366F9C3, 0xC8B38E74, 0xB475F255, 0x46FCD9B9, + 0x7AEB2661, 0x8B1DDF84, 0x846A0E79, 0x915F95E2, 0x466E598E, 0x20B45770, + 0x8CD55591, 0xC902DE4C, 0xB90BACE1, 0xBB8205D0, 0x11A86248, 0x7574A99E, + 0xB77F19B6, 0xE0A9DC09, 0x662D09A1, 0xC4324633, 0xE85A1F02, 0x09F0BE8C, + 0x4A99A025, 0x1D6EFE10, 0x1AB93D1D, 0x0BA5A4DF, 0xA186F20F, 0x2868F169, + 0xDCB7DA83, 0x573906FE, 0xA1E2CE9B, 0x4FCD7F52, 0x50115E01, 0xA70683FA, + 0xA002B5C4, 0x0DE6D027, 0x9AF88C27, 0x773F8641, 0xC3604C06, 0x61A806B5, + 0xF0177A28, 0xC0F586E0, 0x006058AA, 0x30DC7D62, 0x11E69ED7, 0x2338EA63, + 0x53C2DD94, 0xC2C21634, 0xBBCBEE56, 0x90BCB6DE, 0xEBFC7DA1, 0xCE591D76, + 0x6F05E409, 0x4B7C0188, 0x39720A3D, 0x7C927C24, 0x86E3725F, 0x724D9DB9, + 0x1AC15BB4, 0xD39EB8FC, 0xED545578, 0x08FCA5B5, 0xD83D7CD3, 0x4DAD0FC4, + 0x1E50EF5E, 0xB161E6F8, 0xA28514D9, 0x6C51133C, 0x6FD5C7E7, 0x56E14EC4, + 0x362ABFCE, 0xDDC6C837, 0xD79A3234, 0x92638212, 0x670EFA8E, 0x406000E0, + 0x3A39CE37, 0xD3FAF5CF, 0xABC27737, 0x5AC52D1B, 0x5CB0679E, 0x4FA33742, + 0xD3822740, 0x99BC9BBE, 0xD5118E9D, 0xBF0F7315, 0xD62D1C7E, 0xC700C47B, + 0xB78C1B6B, 0x21A19045, 0xB26EB1BE, 0x6A366EB4, 0x5748AB2F, 0xBC946E79, + 0xC6A376D2, 0x6549C2C8, 0x530FF8EE, 0x468DDE7D, 0xD5730A1D, 0x4CD04DC6, + 0x2939BBDB, 0xA9BA4650, 0xAC9526E8, 0xBE5EE304, 0xA1FAD5F0, 0x6A2D519A, + 0x63EF8CE2, 0x9A86EE22, 0xC089C2B8, 0x43242EF6, 0xA51E03AA, 0x9CF2D0A4, + 0x83C061BA, 0x9BE96A4D, 0x8FE51550, 0xBA645BD6, 0x2826A2F9, 0xA73A3AE1, + 0x4BA99586, 0xEF5562E9, 0xC72FEFD3, 0xF752F7DA, 0x3F046F69, 0x77FA0A59, + 0x80E4A915, 0x87B08601, 0x9B09E6AD, 0x3B3EE593, 0xE990FD5A, 0x9E34D797, + 0x2CF0B7D9, 0x022B8B51, 0x96D5AC3A, 0x017DA67D, 0xD1CF3ED6, 0x7C7D2D28, + 0x1F9F25CF, 0xADF2B89B, 0x5AD6B472, 0x5A88F54C, 0xE029AC71, 0xE019A5E6, + 0x47B0ACFD, 0xED93FA9B, 0xE8D3C48D, 0x283B57CC, 0xF8D56629, 0x79132E28, + 0x785F0191, 0xED756055, 0xF7960E44, 0xE3D35E8C, 0x15056DD4, 0x88F46DBA, + 0x03A16125, 0x0564F0BD, 0xC3EB9E15, 0x3C9057A2, 0x97271AEC, 0xA93A072A, + 0x1B3F6D9B, 0x1E6321F5, 0xF59C66FB, 0x26DCF319, 0x7533D928, 0xB155FDF5, + 0x03563482, 0x8ABA3CBB, 0x28517711, 0xC20AD9F8, 0xABCC5167, 0xCCAD925F, + 0x4DE81751, 0x3830DC8E, 0x379D5862, 0x9320F991, 0xEA7A90C2, 0xFB3E7BCE, + 0x5121CE64, 0x774FBE32, 0xA8B6E37E, 0xC3293D46, 0x48DE5369, 0x6413E680, + 0xA2AE0810, 0xDD6DB224, 0x69852DFD, 0x09072166, 0xB39A460A, 0x6445C0DD, + 0x586CDECF, 0x1C20C8AE, 0x5BBEF7DD, 0x1B588D40, 0xCCD2017F, 0x6BB4E3BB, + 0xDDA26A7E, 0x3A59FF45, 0x3E350A44, 0xBCB4CDD5, 0x72EACEA8, 0xFA6484BB, + 0x8D6612AE, 0xBF3C6F47, 0xD29BE463, 0x542F5D9E, 0xAEC2771B, 0xF64E6370, + 0x740E0D8D, 0xE75B1357, 0xF8721671, 0xAF537D5D, 0x4040CB08, 0x4EB4E2CC, + 0x34D2466A, 0x0115AF84, 0xE1B00428, 0x95983A1D, 0x06B89FB4, 0xCE6EA048, + 0x6F3F3B82, 0x3520AB82, 0x011A1D4B, 0x277227F8, 0x611560B1, 0xE7933FDC, + 0xBB3A792B, 0x344525BD, 0xA08839E1, 0x51CE794B, 0x2F32C9B7, 0xA01FBAC9, + 0xE01CC87E, 0xBCC7D1F6, 0xCF0111C3, 0xA1E8AAC7, 0x1A908749, 0xD44FBD9A, + 0xD0DADECB, 0xD50ADA38, 0x0339C32A, 0xC6913667, 0x8DF9317C, 0xE0B12B4F, + 0xF79E59B7, 0x43F5BB3A, 0xF2D519FF, 0x27D9459C, 0xBF97222C, 0x15E6FC2A, + 0x0F91FC71, 0x9B941525, 0xFAE59361, 0xCEB69CEB, 0xC2A86459, 0x12BAA8D1, + 0xB6C1075E, 0xE3056A0C, 0x10D25065, 0xCB03A442, 0xE0EC6E0E, 0x1698DB3B, + 0x4C98A0BE, 0x3278E964, 0x9F1F9532, 0xE0D392DF, 0xD3A0342B, 0x8971F21E, + 0x1B0A7441, 0x4BA3348C, 0xC5BE7120, 0xC37632D8, 0xDF359F8D, 0x9B992F2E, + 0xE60B6F47, 0x0FE3F11D, 0xE54CDA54, 0x1EDAD891, 0xCE6279CF, 0xCD3E7E6F, + 0x1618B166, 0xFD2C1D05, 0x848FD2C5, 0xF6FB2299, 0xF523F357, 0xA6327623, + 0x93A83531, 0x56CCCD02, 0xACF08162, 0x5A75EBB5, 0x6E163697, 0x88D273CC, + 0xDE966292, 0x81B949D0, 0x4C50901B, 0x71C65614, 0xE6C6C7BD, 0x327A140A, + 0x45E1D006, 0xC3F27B9A, 0xC9AA53FD, 0x62A80F00, 0xBB25BFE2, 0x35BDD2F6, + 0x71126905, 0xB2040222, 0xB6CBCF7C, 0xCD769C2B, 0x53113EC0, 0x1640E3D3, + 0x38ABBD60, 0x2547ADF0, 0xBA38209C, 0xF746CE76, 0x77AFA1C5, 0x20756060, + 0x85CBFE4E, 0x8AE88DD8, 0x7AAAF9B0, 0x4CF9AA7E, 0x1948C25C, 0x02FB8A8C, + 0x01C36AE4, 0xD6EBE1F9, 0x90D4F869, 0xA65CDEA0, 0x3F09252D, 0xC208E69F, + 0xB74E6132, 0xCE77E25B, 0x578FDFE3, 0x3AC372E6 }; + +inline uint32_t BFF(uint32_t X, const secure_vector<uint32_t>& S) + { + return ((S[ get_byte(0, X)] + S[256+get_byte(1, X)]) ^ + S[512+get_byte(2, X)]) + S[768+get_byte(3, X)]; + } + +} + +/* +* Blowfish Encryption +*/ +void Blowfish::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_S.empty() == false); + + while(blocks >= 4) + { + uint32_t L0, R0, L1, R1, L2, R2, L3, R3; + load_be(in, L0, R0, L1, R1, L2, R2, L3, R3); + + for(size_t r = 0; r != 16; r += 2) + { + L0 ^= m_P[r]; + L1 ^= m_P[r]; + L2 ^= m_P[r]; + L3 ^= m_P[r]; + R0 ^= BFF(L0, m_S); + R1 ^= BFF(L1, m_S); + R2 ^= BFF(L2, m_S); + R3 ^= BFF(L3, m_S); + + R0 ^= m_P[r+1]; + R1 ^= m_P[r+1]; + R2 ^= m_P[r+1]; + R3 ^= m_P[r+1]; + L0 ^= BFF(R0, m_S); + L1 ^= BFF(R1, m_S); + L2 ^= BFF(R2, m_S); + L3 ^= BFF(R3, m_S); + } + + L0 ^= m_P[16]; R0 ^= m_P[17]; + L1 ^= m_P[16]; R1 ^= m_P[17]; + L2 ^= m_P[16]; R2 ^= m_P[17]; + L3 ^= m_P[16]; R3 ^= m_P[17]; + + store_be(out, R0, L0, R1, L1, R2, L2, R3, L3); + + in += 4*BLOCK_SIZE; + out += 4*BLOCK_SIZE; + blocks -= 4; + } + + while(blocks) + { + uint32_t L, R; + load_be(in, L, R); + + for(size_t r = 0; r != 16; r += 2) + { + L ^= m_P[r]; + R ^= BFF(L, m_S); + + R ^= m_P[r+1]; + L ^= BFF(R, m_S); + } + + L ^= m_P[16]; R ^= m_P[17]; + + store_be(out, R, L); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + blocks--; + } + } + +/* +* Blowfish Decryption +*/ +void Blowfish::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_S.empty() == false); + + while(blocks >= 4) + { + uint32_t L0, R0, L1, R1, L2, R2, L3, R3; + load_be(in, L0, R0, L1, R1, L2, R2, L3, R3); + + for(size_t r = 17; r != 1; r -= 2) + { + L0 ^= m_P[r]; + L1 ^= m_P[r]; + L2 ^= m_P[r]; + L3 ^= m_P[r]; + R0 ^= BFF(L0, m_S); + R1 ^= BFF(L1, m_S); + R2 ^= BFF(L2, m_S); + R3 ^= BFF(L3, m_S); + + R0 ^= m_P[r-1]; + R1 ^= m_P[r-1]; + R2 ^= m_P[r-1]; + R3 ^= m_P[r-1]; + + L0 ^= BFF(R0, m_S); + L1 ^= BFF(R1, m_S); + L2 ^= BFF(R2, m_S); + L3 ^= BFF(R3, m_S); + } + + L0 ^= m_P[1]; R0 ^= m_P[0]; + L1 ^= m_P[1]; R1 ^= m_P[0]; + L2 ^= m_P[1]; R2 ^= m_P[0]; + L3 ^= m_P[1]; R3 ^= m_P[0]; + + store_be(out, R0, L0, R1, L1, R2, L2, R3, L3); + + in += 4*BLOCK_SIZE; + out += 4*BLOCK_SIZE; + blocks -= 4; + } + + while(blocks) + { + uint32_t L, R; + load_be(in, L, R); + + for(size_t r = 17; r != 1; r -= 2) + { + L ^= m_P[r]; + R ^= BFF(L, m_S); + + R ^= m_P[r-1]; + L ^= BFF(R, m_S); + } + + L ^= m_P[1]; R ^= m_P[0]; + + store_be(out, R, L); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + blocks--; + } + } + +/* +* Blowfish Key Schedule +*/ +void Blowfish::key_schedule(const uint8_t key[], size_t length) + { + m_P.resize(18); + copy_mem(m_P.data(), P_INIT, 18); + + m_S.resize(1024); + copy_mem(m_S.data(), S_INIT, 1024); + + key_expansion(key, length, nullptr, 0); + } + +void Blowfish::key_expansion(const uint8_t key[], + size_t length, + const uint8_t salt[], + size_t salt_length) + { + BOTAN_ASSERT_NOMSG(salt_length % 4 == 0); + + for(size_t i = 0, j = 0; i != 18; ++i, j += 4) + m_P[i] ^= make_uint32(key[(j ) % length], key[(j+1) % length], + key[(j+2) % length], key[(j+3) % length]); + + const size_t P_salt_offset = (salt_length > 0) ? 18 % (salt_length / 4) : 0; + + uint32_t L = 0, R = 0; + generate_sbox(m_P, L, R, salt, salt_length, 0); + generate_sbox(m_S, L, R, salt, salt_length, P_salt_offset); + } + +/* +* Modified key schedule used for bcrypt password hashing +*/ +void Blowfish::salted_set_key(const uint8_t key[], size_t length, + const uint8_t salt[], size_t salt_length, + size_t workfactor, bool salt_first) + { + BOTAN_ARG_CHECK(salt_length > 0 && salt_length % 4 == 0, + "Invalid salt length for Blowfish salted key schedule"); + + if(length > 72) + { + // Truncate longer passwords to the 72 char bcrypt limit + length = 72; + } + + m_P.resize(18); + copy_mem(m_P.data(), P_INIT, 18); + + m_S.resize(1024); + copy_mem(m_S.data(), S_INIT, 1024); + key_expansion(key, length, salt, salt_length); + + if(workfactor > 0) + { + const size_t rounds = static_cast<size_t>(1) << workfactor; + + for(size_t r = 0; r != rounds; ++r) + { + if(salt_first) + { + key_expansion(salt, salt_length, nullptr, 0); + key_expansion(key, length, nullptr, 0); + } + else + { + key_expansion(key, length, nullptr, 0); + key_expansion(salt, salt_length, nullptr, 0); + } + } + } + } + +/* +* Generate one of the Sboxes +*/ +void Blowfish::generate_sbox(secure_vector<uint32_t>& box, + uint32_t& L, uint32_t& R, + const uint8_t salt[], + size_t salt_length, + size_t salt_off) const + { + for(size_t i = 0; i != box.size(); i += 2) + { + if(salt_length > 0) + { + L ^= load_be<uint32_t>(salt, (i + salt_off) % (salt_length / 4)); + R ^= load_be<uint32_t>(salt, (i + salt_off + 1) % (salt_length / 4)); + } + + for(size_t r = 0; r != 16; r += 2) + { + L ^= m_P[r]; + R ^= BFF(L, m_S); + + R ^= m_P[r+1]; + L ^= BFF(R, m_S); + } + + uint32_t T = R; R = L ^ m_P[16]; L = T ^ m_P[17]; + box[i] = L; + box[i+1] = R; + } + } + +/* +* Clear memory of sensitive data +*/ +void Blowfish::clear() + { + zap(m_P); + zap(m_S); + } + +} diff --git a/comm/third_party/botan/src/lib/block/blowfish/blowfish.h b/comm/third_party/botan/src/lib/block/blowfish/blowfish.h new file mode 100644 index 0000000000..3ba39cbdbb --- /dev/null +++ b/comm/third_party/botan/src/lib/block/blowfish/blowfish.h @@ -0,0 +1,62 @@ +/* +* Blowfish +* (C) 1999-2011 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_BLOWFISH_H_ +#define BOTAN_BLOWFISH_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(blowfish.h) + +namespace Botan { + +/** +* Blowfish +*/ +class BOTAN_PUBLIC_API(2,0) Blowfish final : public Block_Cipher_Fixed_Params<8, 1, 56> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + /** + * Modified EKSBlowfish key schedule, used for bcrypt password hashing + */ + void salted_set_key(const uint8_t key[], size_t key_length, + const uint8_t salt[], size_t salt_length, + const size_t workfactor, bool salt_first = false); + + BOTAN_DEPRECATED("Use Blowfish::salted_set_key taking salt length") + void eks_key_schedule(const uint8_t key[], size_t key_length, + const uint8_t salt[16], size_t workfactor) + { + salted_set_key(key, key_length, salt, 16, workfactor); + } + + void clear() override; + std::string name() const override { return "Blowfish"; } + BlockCipher* clone() const override { return new Blowfish; } + private: + void key_schedule(const uint8_t key[], size_t length) override; + + void key_expansion(const uint8_t key[], + size_t key_length, + const uint8_t salt[], + size_t salt_length); + + void generate_sbox(secure_vector<uint32_t>& box, + uint32_t& L, uint32_t& R, + const uint8_t salt[], + size_t salt_length, + size_t salt_off) const; + + secure_vector<uint32_t> m_S, m_P; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/blowfish/info.txt b/comm/third_party/botan/src/lib/block/blowfish/info.txt new file mode 100644 index 0000000000..cc72634dfc --- /dev/null +++ b/comm/third_party/botan/src/lib/block/blowfish/info.txt @@ -0,0 +1,3 @@ +<defines> +BLOWFISH -> 20180718 +</defines> diff --git a/comm/third_party/botan/src/lib/block/camellia/camellia.cpp b/comm/third_party/botan/src/lib/block/camellia/camellia.cpp new file mode 100644 index 0000000000..557b3012db --- /dev/null +++ b/comm/third_party/botan/src/lib/block/camellia/camellia.cpp @@ -0,0 +1,924 @@ +/* +* Camellia +* (C) 2012 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/camellia.h> +#include <botan/loadstor.h> +#include <botan/rotate.h> + +namespace Botan { + +namespace { + +alignas(64) const uint64_t Camellia_SBOX1[256] = { +0x7070700070000070, 0x8282820082000082, 0x2C2C2C002C00002C, 0xECECEC00EC0000EC, +0xB3B3B300B30000B3, 0x2727270027000027, 0xC0C0C000C00000C0, 0xE5E5E500E50000E5, +0xE4E4E400E40000E4, 0x8585850085000085, 0x5757570057000057, 0x3535350035000035, +0xEAEAEA00EA0000EA, 0x0C0C0C000C00000C, 0xAEAEAE00AE0000AE, 0x4141410041000041, +0x2323230023000023, 0xEFEFEF00EF0000EF, 0x6B6B6B006B00006B, 0x9393930093000093, +0x4545450045000045, 0x1919190019000019, 0xA5A5A500A50000A5, 0x2121210021000021, +0xEDEDED00ED0000ED, 0x0E0E0E000E00000E, 0x4F4F4F004F00004F, 0x4E4E4E004E00004E, +0x1D1D1D001D00001D, 0x6565650065000065, 0x9292920092000092, 0xBDBDBD00BD0000BD, +0x8686860086000086, 0xB8B8B800B80000B8, 0xAFAFAF00AF0000AF, 0x8F8F8F008F00008F, +0x7C7C7C007C00007C, 0xEBEBEB00EB0000EB, 0x1F1F1F001F00001F, 0xCECECE00CE0000CE, +0x3E3E3E003E00003E, 0x3030300030000030, 0xDCDCDC00DC0000DC, 0x5F5F5F005F00005F, +0x5E5E5E005E00005E, 0xC5C5C500C50000C5, 0x0B0B0B000B00000B, 0x1A1A1A001A00001A, +0xA6A6A600A60000A6, 0xE1E1E100E10000E1, 0x3939390039000039, 0xCACACA00CA0000CA, +0xD5D5D500D50000D5, 0x4747470047000047, 0x5D5D5D005D00005D, 0x3D3D3D003D00003D, +0xD9D9D900D90000D9, 0x0101010001000001, 0x5A5A5A005A00005A, 0xD6D6D600D60000D6, +0x5151510051000051, 0x5656560056000056, 0x6C6C6C006C00006C, 0x4D4D4D004D00004D, +0x8B8B8B008B00008B, 0x0D0D0D000D00000D, 0x9A9A9A009A00009A, 0x6666660066000066, +0xFBFBFB00FB0000FB, 0xCCCCCC00CC0000CC, 0xB0B0B000B00000B0, 0x2D2D2D002D00002D, +0x7474740074000074, 0x1212120012000012, 0x2B2B2B002B00002B, 0x2020200020000020, +0xF0F0F000F00000F0, 0xB1B1B100B10000B1, 0x8484840084000084, 0x9999990099000099, +0xDFDFDF00DF0000DF, 0x4C4C4C004C00004C, 0xCBCBCB00CB0000CB, 0xC2C2C200C20000C2, +0x3434340034000034, 0x7E7E7E007E00007E, 0x7676760076000076, 0x0505050005000005, +0x6D6D6D006D00006D, 0xB7B7B700B70000B7, 0xA9A9A900A90000A9, 0x3131310031000031, +0xD1D1D100D10000D1, 0x1717170017000017, 0x0404040004000004, 0xD7D7D700D70000D7, +0x1414140014000014, 0x5858580058000058, 0x3A3A3A003A00003A, 0x6161610061000061, +0xDEDEDE00DE0000DE, 0x1B1B1B001B00001B, 0x1111110011000011, 0x1C1C1C001C00001C, +0x3232320032000032, 0x0F0F0F000F00000F, 0x9C9C9C009C00009C, 0x1616160016000016, +0x5353530053000053, 0x1818180018000018, 0xF2F2F200F20000F2, 0x2222220022000022, +0xFEFEFE00FE0000FE, 0x4444440044000044, 0xCFCFCF00CF0000CF, 0xB2B2B200B20000B2, +0xC3C3C300C30000C3, 0xB5B5B500B50000B5, 0x7A7A7A007A00007A, 0x9191910091000091, +0x2424240024000024, 0x0808080008000008, 0xE8E8E800E80000E8, 0xA8A8A800A80000A8, +0x6060600060000060, 0xFCFCFC00FC0000FC, 0x6969690069000069, 0x5050500050000050, +0xAAAAAA00AA0000AA, 0xD0D0D000D00000D0, 0xA0A0A000A00000A0, 0x7D7D7D007D00007D, +0xA1A1A100A10000A1, 0x8989890089000089, 0x6262620062000062, 0x9797970097000097, +0x5454540054000054, 0x5B5B5B005B00005B, 0x1E1E1E001E00001E, 0x9595950095000095, +0xE0E0E000E00000E0, 0xFFFFFF00FF0000FF, 0x6464640064000064, 0xD2D2D200D20000D2, +0x1010100010000010, 0xC4C4C400C40000C4, 0x0000000000000000, 0x4848480048000048, +0xA3A3A300A30000A3, 0xF7F7F700F70000F7, 0x7575750075000075, 0xDBDBDB00DB0000DB, +0x8A8A8A008A00008A, 0x0303030003000003, 0xE6E6E600E60000E6, 0xDADADA00DA0000DA, +0x0909090009000009, 0x3F3F3F003F00003F, 0xDDDDDD00DD0000DD, 0x9494940094000094, +0x8787870087000087, 0x5C5C5C005C00005C, 0x8383830083000083, 0x0202020002000002, +0xCDCDCD00CD0000CD, 0x4A4A4A004A00004A, 0x9090900090000090, 0x3333330033000033, +0x7373730073000073, 0x6767670067000067, 0xF6F6F600F60000F6, 0xF3F3F300F30000F3, +0x9D9D9D009D00009D, 0x7F7F7F007F00007F, 0xBFBFBF00BF0000BF, 0xE2E2E200E20000E2, +0x5252520052000052, 0x9B9B9B009B00009B, 0xD8D8D800D80000D8, 0x2626260026000026, +0xC8C8C800C80000C8, 0x3737370037000037, 0xC6C6C600C60000C6, 0x3B3B3B003B00003B, +0x8181810081000081, 0x9696960096000096, 0x6F6F6F006F00006F, 0x4B4B4B004B00004B, +0x1313130013000013, 0xBEBEBE00BE0000BE, 0x6363630063000063, 0x2E2E2E002E00002E, +0xE9E9E900E90000E9, 0x7979790079000079, 0xA7A7A700A70000A7, 0x8C8C8C008C00008C, +0x9F9F9F009F00009F, 0x6E6E6E006E00006E, 0xBCBCBC00BC0000BC, 0x8E8E8E008E00008E, +0x2929290029000029, 0xF5F5F500F50000F5, 0xF9F9F900F90000F9, 0xB6B6B600B60000B6, +0x2F2F2F002F00002F, 0xFDFDFD00FD0000FD, 0xB4B4B400B40000B4, 0x5959590059000059, +0x7878780078000078, 0x9898980098000098, 0x0606060006000006, 0x6A6A6A006A00006A, +0xE7E7E700E70000E7, 0x4646460046000046, 0x7171710071000071, 0xBABABA00BA0000BA, +0xD4D4D400D40000D4, 0x2525250025000025, 0xABABAB00AB0000AB, 0x4242420042000042, +0x8888880088000088, 0xA2A2A200A20000A2, 0x8D8D8D008D00008D, 0xFAFAFA00FA0000FA, +0x7272720072000072, 0x0707070007000007, 0xB9B9B900B90000B9, 0x5555550055000055, +0xF8F8F800F80000F8, 0xEEEEEE00EE0000EE, 0xACACAC00AC0000AC, 0x0A0A0A000A00000A, +0x3636360036000036, 0x4949490049000049, 0x2A2A2A002A00002A, 0x6868680068000068, +0x3C3C3C003C00003C, 0x3838380038000038, 0xF1F1F100F10000F1, 0xA4A4A400A40000A4, +0x4040400040000040, 0x2828280028000028, 0xD3D3D300D30000D3, 0x7B7B7B007B00007B, +0xBBBBBB00BB0000BB, 0xC9C9C900C90000C9, 0x4343430043000043, 0xC1C1C100C10000C1, +0x1515150015000015, 0xE3E3E300E30000E3, 0xADADAD00AD0000AD, 0xF4F4F400F40000F4, +0x7777770077000077, 0xC7C7C700C70000C7, 0x8080800080000080, 0x9E9E9E009E00009E }; + +alignas(64) const uint64_t Camellia_SBOX2[256] = { +0x00E0E0E0E0E00000, 0x0005050505050000, 0x0058585858580000, 0x00D9D9D9D9D90000, +0x0067676767670000, 0x004E4E4E4E4E0000, 0x0081818181810000, 0x00CBCBCBCBCB0000, +0x00C9C9C9C9C90000, 0x000B0B0B0B0B0000, 0x00AEAEAEAEAE0000, 0x006A6A6A6A6A0000, +0x00D5D5D5D5D50000, 0x0018181818180000, 0x005D5D5D5D5D0000, 0x0082828282820000, +0x0046464646460000, 0x00DFDFDFDFDF0000, 0x00D6D6D6D6D60000, 0x0027272727270000, +0x008A8A8A8A8A0000, 0x0032323232320000, 0x004B4B4B4B4B0000, 0x0042424242420000, +0x00DBDBDBDBDB0000, 0x001C1C1C1C1C0000, 0x009E9E9E9E9E0000, 0x009C9C9C9C9C0000, +0x003A3A3A3A3A0000, 0x00CACACACACA0000, 0x0025252525250000, 0x007B7B7B7B7B0000, +0x000D0D0D0D0D0000, 0x0071717171710000, 0x005F5F5F5F5F0000, 0x001F1F1F1F1F0000, +0x00F8F8F8F8F80000, 0x00D7D7D7D7D70000, 0x003E3E3E3E3E0000, 0x009D9D9D9D9D0000, +0x007C7C7C7C7C0000, 0x0060606060600000, 0x00B9B9B9B9B90000, 0x00BEBEBEBEBE0000, +0x00BCBCBCBCBC0000, 0x008B8B8B8B8B0000, 0x0016161616160000, 0x0034343434340000, +0x004D4D4D4D4D0000, 0x00C3C3C3C3C30000, 0x0072727272720000, 0x0095959595950000, +0x00ABABABABAB0000, 0x008E8E8E8E8E0000, 0x00BABABABABA0000, 0x007A7A7A7A7A0000, +0x00B3B3B3B3B30000, 0x0002020202020000, 0x00B4B4B4B4B40000, 0x00ADADADADAD0000, +0x00A2A2A2A2A20000, 0x00ACACACACAC0000, 0x00D8D8D8D8D80000, 0x009A9A9A9A9A0000, +0x0017171717170000, 0x001A1A1A1A1A0000, 0x0035353535350000, 0x00CCCCCCCCCC0000, +0x00F7F7F7F7F70000, 0x0099999999990000, 0x0061616161610000, 0x005A5A5A5A5A0000, +0x00E8E8E8E8E80000, 0x0024242424240000, 0x0056565656560000, 0x0040404040400000, +0x00E1E1E1E1E10000, 0x0063636363630000, 0x0009090909090000, 0x0033333333330000, +0x00BFBFBFBFBF0000, 0x0098989898980000, 0x0097979797970000, 0x0085858585850000, +0x0068686868680000, 0x00FCFCFCFCFC0000, 0x00ECECECECEC0000, 0x000A0A0A0A0A0000, +0x00DADADADADA0000, 0x006F6F6F6F6F0000, 0x0053535353530000, 0x0062626262620000, +0x00A3A3A3A3A30000, 0x002E2E2E2E2E0000, 0x0008080808080000, 0x00AFAFAFAFAF0000, +0x0028282828280000, 0x00B0B0B0B0B00000, 0x0074747474740000, 0x00C2C2C2C2C20000, +0x00BDBDBDBDBD0000, 0x0036363636360000, 0x0022222222220000, 0x0038383838380000, +0x0064646464640000, 0x001E1E1E1E1E0000, 0x0039393939390000, 0x002C2C2C2C2C0000, +0x00A6A6A6A6A60000, 0x0030303030300000, 0x00E5E5E5E5E50000, 0x0044444444440000, +0x00FDFDFDFDFD0000, 0x0088888888880000, 0x009F9F9F9F9F0000, 0x0065656565650000, +0x0087878787870000, 0x006B6B6B6B6B0000, 0x00F4F4F4F4F40000, 0x0023232323230000, +0x0048484848480000, 0x0010101010100000, 0x00D1D1D1D1D10000, 0x0051515151510000, +0x00C0C0C0C0C00000, 0x00F9F9F9F9F90000, 0x00D2D2D2D2D20000, 0x00A0A0A0A0A00000, +0x0055555555550000, 0x00A1A1A1A1A10000, 0x0041414141410000, 0x00FAFAFAFAFA0000, +0x0043434343430000, 0x0013131313130000, 0x00C4C4C4C4C40000, 0x002F2F2F2F2F0000, +0x00A8A8A8A8A80000, 0x00B6B6B6B6B60000, 0x003C3C3C3C3C0000, 0x002B2B2B2B2B0000, +0x00C1C1C1C1C10000, 0x00FFFFFFFFFF0000, 0x00C8C8C8C8C80000, 0x00A5A5A5A5A50000, +0x0020202020200000, 0x0089898989890000, 0x0000000000000000, 0x0090909090900000, +0x0047474747470000, 0x00EFEFEFEFEF0000, 0x00EAEAEAEAEA0000, 0x00B7B7B7B7B70000, +0x0015151515150000, 0x0006060606060000, 0x00CDCDCDCDCD0000, 0x00B5B5B5B5B50000, +0x0012121212120000, 0x007E7E7E7E7E0000, 0x00BBBBBBBBBB0000, 0x0029292929290000, +0x000F0F0F0F0F0000, 0x00B8B8B8B8B80000, 0x0007070707070000, 0x0004040404040000, +0x009B9B9B9B9B0000, 0x0094949494940000, 0x0021212121210000, 0x0066666666660000, +0x00E6E6E6E6E60000, 0x00CECECECECE0000, 0x00EDEDEDEDED0000, 0x00E7E7E7E7E70000, +0x003B3B3B3B3B0000, 0x00FEFEFEFEFE0000, 0x007F7F7F7F7F0000, 0x00C5C5C5C5C50000, +0x00A4A4A4A4A40000, 0x0037373737370000, 0x00B1B1B1B1B10000, 0x004C4C4C4C4C0000, +0x0091919191910000, 0x006E6E6E6E6E0000, 0x008D8D8D8D8D0000, 0x0076767676760000, +0x0003030303030000, 0x002D2D2D2D2D0000, 0x00DEDEDEDEDE0000, 0x0096969696960000, +0x0026262626260000, 0x007D7D7D7D7D0000, 0x00C6C6C6C6C60000, 0x005C5C5C5C5C0000, +0x00D3D3D3D3D30000, 0x00F2F2F2F2F20000, 0x004F4F4F4F4F0000, 0x0019191919190000, +0x003F3F3F3F3F0000, 0x00DCDCDCDCDC0000, 0x0079797979790000, 0x001D1D1D1D1D0000, +0x0052525252520000, 0x00EBEBEBEBEB0000, 0x00F3F3F3F3F30000, 0x006D6D6D6D6D0000, +0x005E5E5E5E5E0000, 0x00FBFBFBFBFB0000, 0x0069696969690000, 0x00B2B2B2B2B20000, +0x00F0F0F0F0F00000, 0x0031313131310000, 0x000C0C0C0C0C0000, 0x00D4D4D4D4D40000, +0x00CFCFCFCFCF0000, 0x008C8C8C8C8C0000, 0x00E2E2E2E2E20000, 0x0075757575750000, +0x00A9A9A9A9A90000, 0x004A4A4A4A4A0000, 0x0057575757570000, 0x0084848484840000, +0x0011111111110000, 0x0045454545450000, 0x001B1B1B1B1B0000, 0x00F5F5F5F5F50000, +0x00E4E4E4E4E40000, 0x000E0E0E0E0E0000, 0x0073737373730000, 0x00AAAAAAAAAA0000, +0x00F1F1F1F1F10000, 0x00DDDDDDDDDD0000, 0x0059595959590000, 0x0014141414140000, +0x006C6C6C6C6C0000, 0x0092929292920000, 0x0054545454540000, 0x00D0D0D0D0D00000, +0x0078787878780000, 0x0070707070700000, 0x00E3E3E3E3E30000, 0x0049494949490000, +0x0080808080800000, 0x0050505050500000, 0x00A7A7A7A7A70000, 0x00F6F6F6F6F60000, +0x0077777777770000, 0x0093939393930000, 0x0086868686860000, 0x0083838383830000, +0x002A2A2A2A2A0000, 0x00C7C7C7C7C70000, 0x005B5B5B5B5B0000, 0x00E9E9E9E9E90000, +0x00EEEEEEEEEE0000, 0x008F8F8F8F8F0000, 0x0001010101010000, 0x003D3D3D3D3D0000 }; + +alignas(64) const uint64_t Camellia_SBOX3[256] = { +0x3800383800383800, 0x4100414100414100, 0x1600161600161600, 0x7600767600767600, +0xD900D9D900D9D900, 0x9300939300939300, 0x6000606000606000, 0xF200F2F200F2F200, +0x7200727200727200, 0xC200C2C200C2C200, 0xAB00ABAB00ABAB00, 0x9A009A9A009A9A00, +0x7500757500757500, 0x0600060600060600, 0x5700575700575700, 0xA000A0A000A0A000, +0x9100919100919100, 0xF700F7F700F7F700, 0xB500B5B500B5B500, 0xC900C9C900C9C900, +0xA200A2A200A2A200, 0x8C008C8C008C8C00, 0xD200D2D200D2D200, 0x9000909000909000, +0xF600F6F600F6F600, 0x0700070700070700, 0xA700A7A700A7A700, 0x2700272700272700, +0x8E008E8E008E8E00, 0xB200B2B200B2B200, 0x4900494900494900, 0xDE00DEDE00DEDE00, +0x4300434300434300, 0x5C005C5C005C5C00, 0xD700D7D700D7D700, 0xC700C7C700C7C700, +0x3E003E3E003E3E00, 0xF500F5F500F5F500, 0x8F008F8F008F8F00, 0x6700676700676700, +0x1F001F1F001F1F00, 0x1800181800181800, 0x6E006E6E006E6E00, 0xAF00AFAF00AFAF00, +0x2F002F2F002F2F00, 0xE200E2E200E2E200, 0x8500858500858500, 0x0D000D0D000D0D00, +0x5300535300535300, 0xF000F0F000F0F000, 0x9C009C9C009C9C00, 0x6500656500656500, +0xEA00EAEA00EAEA00, 0xA300A3A300A3A300, 0xAE00AEAE00AEAE00, 0x9E009E9E009E9E00, +0xEC00ECEC00ECEC00, 0x8000808000808000, 0x2D002D2D002D2D00, 0x6B006B6B006B6B00, +0xA800A8A800A8A800, 0x2B002B2B002B2B00, 0x3600363600363600, 0xA600A6A600A6A600, +0xC500C5C500C5C500, 0x8600868600868600, 0x4D004D4D004D4D00, 0x3300333300333300, +0xFD00FDFD00FDFD00, 0x6600666600666600, 0x5800585800585800, 0x9600969600969600, +0x3A003A3A003A3A00, 0x0900090900090900, 0x9500959500959500, 0x1000101000101000, +0x7800787800787800, 0xD800D8D800D8D800, 0x4200424200424200, 0xCC00CCCC00CCCC00, +0xEF00EFEF00EFEF00, 0x2600262600262600, 0xE500E5E500E5E500, 0x6100616100616100, +0x1A001A1A001A1A00, 0x3F003F3F003F3F00, 0x3B003B3B003B3B00, 0x8200828200828200, +0xB600B6B600B6B600, 0xDB00DBDB00DBDB00, 0xD400D4D400D4D400, 0x9800989800989800, +0xE800E8E800E8E800, 0x8B008B8B008B8B00, 0x0200020200020200, 0xEB00EBEB00EBEB00, +0x0A000A0A000A0A00, 0x2C002C2C002C2C00, 0x1D001D1D001D1D00, 0xB000B0B000B0B000, +0x6F006F6F006F6F00, 0x8D008D8D008D8D00, 0x8800888800888800, 0x0E000E0E000E0E00, +0x1900191900191900, 0x8700878700878700, 0x4E004E4E004E4E00, 0x0B000B0B000B0B00, +0xA900A9A900A9A900, 0x0C000C0C000C0C00, 0x7900797900797900, 0x1100111100111100, +0x7F007F7F007F7F00, 0x2200222200222200, 0xE700E7E700E7E700, 0x5900595900595900, +0xE100E1E100E1E100, 0xDA00DADA00DADA00, 0x3D003D3D003D3D00, 0xC800C8C800C8C800, +0x1200121200121200, 0x0400040400040400, 0x7400747400747400, 0x5400545400545400, +0x3000303000303000, 0x7E007E7E007E7E00, 0xB400B4B400B4B400, 0x2800282800282800, +0x5500555500555500, 0x6800686800686800, 0x5000505000505000, 0xBE00BEBE00BEBE00, +0xD000D0D000D0D000, 0xC400C4C400C4C400, 0x3100313100313100, 0xCB00CBCB00CBCB00, +0x2A002A2A002A2A00, 0xAD00ADAD00ADAD00, 0x0F000F0F000F0F00, 0xCA00CACA00CACA00, +0x7000707000707000, 0xFF00FFFF00FFFF00, 0x3200323200323200, 0x6900696900696900, +0x0800080800080800, 0x6200626200626200, 0x0000000000000000, 0x2400242400242400, +0xD100D1D100D1D100, 0xFB00FBFB00FBFB00, 0xBA00BABA00BABA00, 0xED00EDED00EDED00, +0x4500454500454500, 0x8100818100818100, 0x7300737300737300, 0x6D006D6D006D6D00, +0x8400848400848400, 0x9F009F9F009F9F00, 0xEE00EEEE00EEEE00, 0x4A004A4A004A4A00, +0xC300C3C300C3C300, 0x2E002E2E002E2E00, 0xC100C1C100C1C100, 0x0100010100010100, +0xE600E6E600E6E600, 0x2500252500252500, 0x4800484800484800, 0x9900999900999900, +0xB900B9B900B9B900, 0xB300B3B300B3B300, 0x7B007B7B007B7B00, 0xF900F9F900F9F900, +0xCE00CECE00CECE00, 0xBF00BFBF00BFBF00, 0xDF00DFDF00DFDF00, 0x7100717100717100, +0x2900292900292900, 0xCD00CDCD00CDCD00, 0x6C006C6C006C6C00, 0x1300131300131300, +0x6400646400646400, 0x9B009B9B009B9B00, 0x6300636300636300, 0x9D009D9D009D9D00, +0xC000C0C000C0C000, 0x4B004B4B004B4B00, 0xB700B7B700B7B700, 0xA500A5A500A5A500, +0x8900898900898900, 0x5F005F5F005F5F00, 0xB100B1B100B1B100, 0x1700171700171700, +0xF400F4F400F4F400, 0xBC00BCBC00BCBC00, 0xD300D3D300D3D300, 0x4600464600464600, +0xCF00CFCF00CFCF00, 0x3700373700373700, 0x5E005E5E005E5E00, 0x4700474700474700, +0x9400949400949400, 0xFA00FAFA00FAFA00, 0xFC00FCFC00FCFC00, 0x5B005B5B005B5B00, +0x9700979700979700, 0xFE00FEFE00FEFE00, 0x5A005A5A005A5A00, 0xAC00ACAC00ACAC00, +0x3C003C3C003C3C00, 0x4C004C4C004C4C00, 0x0300030300030300, 0x3500353500353500, +0xF300F3F300F3F300, 0x2300232300232300, 0xB800B8B800B8B800, 0x5D005D5D005D5D00, +0x6A006A6A006A6A00, 0x9200929200929200, 0xD500D5D500D5D500, 0x2100212100212100, +0x4400444400444400, 0x5100515100515100, 0xC600C6C600C6C600, 0x7D007D7D007D7D00, +0x3900393900393900, 0x8300838300838300, 0xDC00DCDC00DCDC00, 0xAA00AAAA00AAAA00, +0x7C007C7C007C7C00, 0x7700777700777700, 0x5600565600565600, 0x0500050500050500, +0x1B001B1B001B1B00, 0xA400A4A400A4A400, 0x1500151500151500, 0x3400343400343400, +0x1E001E1E001E1E00, 0x1C001C1C001C1C00, 0xF800F8F800F8F800, 0x5200525200525200, +0x2000202000202000, 0x1400141400141400, 0xE900E9E900E9E900, 0xBD00BDBD00BDBD00, +0xDD00DDDD00DDDD00, 0xE400E4E400E4E400, 0xA100A1A100A1A100, 0xE000E0E000E0E000, +0x8A008A8A008A8A00, 0xF100F1F100F1F100, 0xD600D6D600D6D600, 0x7A007A7A007A7A00, +0xBB00BBBB00BBBB00, 0xE300E3E300E3E300, 0x4000404000404000, 0x4F004F4F004F4F00 }; + +alignas(64) const uint64_t Camellia_SBOX4[256] = { +0x7070007000007070, 0x2C2C002C00002C2C, 0xB3B300B30000B3B3, 0xC0C000C00000C0C0, +0xE4E400E40000E4E4, 0x5757005700005757, 0xEAEA00EA0000EAEA, 0xAEAE00AE0000AEAE, +0x2323002300002323, 0x6B6B006B00006B6B, 0x4545004500004545, 0xA5A500A50000A5A5, +0xEDED00ED0000EDED, 0x4F4F004F00004F4F, 0x1D1D001D00001D1D, 0x9292009200009292, +0x8686008600008686, 0xAFAF00AF0000AFAF, 0x7C7C007C00007C7C, 0x1F1F001F00001F1F, +0x3E3E003E00003E3E, 0xDCDC00DC0000DCDC, 0x5E5E005E00005E5E, 0x0B0B000B00000B0B, +0xA6A600A60000A6A6, 0x3939003900003939, 0xD5D500D50000D5D5, 0x5D5D005D00005D5D, +0xD9D900D90000D9D9, 0x5A5A005A00005A5A, 0x5151005100005151, 0x6C6C006C00006C6C, +0x8B8B008B00008B8B, 0x9A9A009A00009A9A, 0xFBFB00FB0000FBFB, 0xB0B000B00000B0B0, +0x7474007400007474, 0x2B2B002B00002B2B, 0xF0F000F00000F0F0, 0x8484008400008484, +0xDFDF00DF0000DFDF, 0xCBCB00CB0000CBCB, 0x3434003400003434, 0x7676007600007676, +0x6D6D006D00006D6D, 0xA9A900A90000A9A9, 0xD1D100D10000D1D1, 0x0404000400000404, +0x1414001400001414, 0x3A3A003A00003A3A, 0xDEDE00DE0000DEDE, 0x1111001100001111, +0x3232003200003232, 0x9C9C009C00009C9C, 0x5353005300005353, 0xF2F200F20000F2F2, +0xFEFE00FE0000FEFE, 0xCFCF00CF0000CFCF, 0xC3C300C30000C3C3, 0x7A7A007A00007A7A, +0x2424002400002424, 0xE8E800E80000E8E8, 0x6060006000006060, 0x6969006900006969, +0xAAAA00AA0000AAAA, 0xA0A000A00000A0A0, 0xA1A100A10000A1A1, 0x6262006200006262, +0x5454005400005454, 0x1E1E001E00001E1E, 0xE0E000E00000E0E0, 0x6464006400006464, +0x1010001000001010, 0x0000000000000000, 0xA3A300A30000A3A3, 0x7575007500007575, +0x8A8A008A00008A8A, 0xE6E600E60000E6E6, 0x0909000900000909, 0xDDDD00DD0000DDDD, +0x8787008700008787, 0x8383008300008383, 0xCDCD00CD0000CDCD, 0x9090009000009090, +0x7373007300007373, 0xF6F600F60000F6F6, 0x9D9D009D00009D9D, 0xBFBF00BF0000BFBF, +0x5252005200005252, 0xD8D800D80000D8D8, 0xC8C800C80000C8C8, 0xC6C600C60000C6C6, +0x8181008100008181, 0x6F6F006F00006F6F, 0x1313001300001313, 0x6363006300006363, +0xE9E900E90000E9E9, 0xA7A700A70000A7A7, 0x9F9F009F00009F9F, 0xBCBC00BC0000BCBC, +0x2929002900002929, 0xF9F900F90000F9F9, 0x2F2F002F00002F2F, 0xB4B400B40000B4B4, +0x7878007800007878, 0x0606000600000606, 0xE7E700E70000E7E7, 0x7171007100007171, +0xD4D400D40000D4D4, 0xABAB00AB0000ABAB, 0x8888008800008888, 0x8D8D008D00008D8D, +0x7272007200007272, 0xB9B900B90000B9B9, 0xF8F800F80000F8F8, 0xACAC00AC0000ACAC, +0x3636003600003636, 0x2A2A002A00002A2A, 0x3C3C003C00003C3C, 0xF1F100F10000F1F1, +0x4040004000004040, 0xD3D300D30000D3D3, 0xBBBB00BB0000BBBB, 0x4343004300004343, +0x1515001500001515, 0xADAD00AD0000ADAD, 0x7777007700007777, 0x8080008000008080, +0x8282008200008282, 0xECEC00EC0000ECEC, 0x2727002700002727, 0xE5E500E50000E5E5, +0x8585008500008585, 0x3535003500003535, 0x0C0C000C00000C0C, 0x4141004100004141, +0xEFEF00EF0000EFEF, 0x9393009300009393, 0x1919001900001919, 0x2121002100002121, +0x0E0E000E00000E0E, 0x4E4E004E00004E4E, 0x6565006500006565, 0xBDBD00BD0000BDBD, +0xB8B800B80000B8B8, 0x8F8F008F00008F8F, 0xEBEB00EB0000EBEB, 0xCECE00CE0000CECE, +0x3030003000003030, 0x5F5F005F00005F5F, 0xC5C500C50000C5C5, 0x1A1A001A00001A1A, +0xE1E100E10000E1E1, 0xCACA00CA0000CACA, 0x4747004700004747, 0x3D3D003D00003D3D, +0x0101000100000101, 0xD6D600D60000D6D6, 0x5656005600005656, 0x4D4D004D00004D4D, +0x0D0D000D00000D0D, 0x6666006600006666, 0xCCCC00CC0000CCCC, 0x2D2D002D00002D2D, +0x1212001200001212, 0x2020002000002020, 0xB1B100B10000B1B1, 0x9999009900009999, +0x4C4C004C00004C4C, 0xC2C200C20000C2C2, 0x7E7E007E00007E7E, 0x0505000500000505, +0xB7B700B70000B7B7, 0x3131003100003131, 0x1717001700001717, 0xD7D700D70000D7D7, +0x5858005800005858, 0x6161006100006161, 0x1B1B001B00001B1B, 0x1C1C001C00001C1C, +0x0F0F000F00000F0F, 0x1616001600001616, 0x1818001800001818, 0x2222002200002222, +0x4444004400004444, 0xB2B200B20000B2B2, 0xB5B500B50000B5B5, 0x9191009100009191, +0x0808000800000808, 0xA8A800A80000A8A8, 0xFCFC00FC0000FCFC, 0x5050005000005050, +0xD0D000D00000D0D0, 0x7D7D007D00007D7D, 0x8989008900008989, 0x9797009700009797, +0x5B5B005B00005B5B, 0x9595009500009595, 0xFFFF00FF0000FFFF, 0xD2D200D20000D2D2, +0xC4C400C40000C4C4, 0x4848004800004848, 0xF7F700F70000F7F7, 0xDBDB00DB0000DBDB, +0x0303000300000303, 0xDADA00DA0000DADA, 0x3F3F003F00003F3F, 0x9494009400009494, +0x5C5C005C00005C5C, 0x0202000200000202, 0x4A4A004A00004A4A, 0x3333003300003333, +0x6767006700006767, 0xF3F300F30000F3F3, 0x7F7F007F00007F7F, 0xE2E200E20000E2E2, +0x9B9B009B00009B9B, 0x2626002600002626, 0x3737003700003737, 0x3B3B003B00003B3B, +0x9696009600009696, 0x4B4B004B00004B4B, 0xBEBE00BE0000BEBE, 0x2E2E002E00002E2E, +0x7979007900007979, 0x8C8C008C00008C8C, 0x6E6E006E00006E6E, 0x8E8E008E00008E8E, +0xF5F500F50000F5F5, 0xB6B600B60000B6B6, 0xFDFD00FD0000FDFD, 0x5959005900005959, +0x9898009800009898, 0x6A6A006A00006A6A, 0x4646004600004646, 0xBABA00BA0000BABA, +0x2525002500002525, 0x4242004200004242, 0xA2A200A20000A2A2, 0xFAFA00FA0000FAFA, +0x0707000700000707, 0x5555005500005555, 0xEEEE00EE0000EEEE, 0x0A0A000A00000A0A, +0x4949004900004949, 0x6868006800006868, 0x3838003800003838, 0xA4A400A40000A4A4, +0x2828002800002828, 0x7B7B007B00007B7B, 0xC9C900C90000C9C9, 0xC1C100C10000C1C1, +0xE3E300E30000E3E3, 0xF4F400F40000F4F4, 0xC7C700C70000C7C7, 0x9E9E009E00009E9E }; + +alignas(64) const uint64_t Camellia_SBOX5[256] = { +0x00E0E0E000E0E0E0, 0x0005050500050505, 0x0058585800585858, 0x00D9D9D900D9D9D9, +0x0067676700676767, 0x004E4E4E004E4E4E, 0x0081818100818181, 0x00CBCBCB00CBCBCB, +0x00C9C9C900C9C9C9, 0x000B0B0B000B0B0B, 0x00AEAEAE00AEAEAE, 0x006A6A6A006A6A6A, +0x00D5D5D500D5D5D5, 0x0018181800181818, 0x005D5D5D005D5D5D, 0x0082828200828282, +0x0046464600464646, 0x00DFDFDF00DFDFDF, 0x00D6D6D600D6D6D6, 0x0027272700272727, +0x008A8A8A008A8A8A, 0x0032323200323232, 0x004B4B4B004B4B4B, 0x0042424200424242, +0x00DBDBDB00DBDBDB, 0x001C1C1C001C1C1C, 0x009E9E9E009E9E9E, 0x009C9C9C009C9C9C, +0x003A3A3A003A3A3A, 0x00CACACA00CACACA, 0x0025252500252525, 0x007B7B7B007B7B7B, +0x000D0D0D000D0D0D, 0x0071717100717171, 0x005F5F5F005F5F5F, 0x001F1F1F001F1F1F, +0x00F8F8F800F8F8F8, 0x00D7D7D700D7D7D7, 0x003E3E3E003E3E3E, 0x009D9D9D009D9D9D, +0x007C7C7C007C7C7C, 0x0060606000606060, 0x00B9B9B900B9B9B9, 0x00BEBEBE00BEBEBE, +0x00BCBCBC00BCBCBC, 0x008B8B8B008B8B8B, 0x0016161600161616, 0x0034343400343434, +0x004D4D4D004D4D4D, 0x00C3C3C300C3C3C3, 0x0072727200727272, 0x0095959500959595, +0x00ABABAB00ABABAB, 0x008E8E8E008E8E8E, 0x00BABABA00BABABA, 0x007A7A7A007A7A7A, +0x00B3B3B300B3B3B3, 0x0002020200020202, 0x00B4B4B400B4B4B4, 0x00ADADAD00ADADAD, +0x00A2A2A200A2A2A2, 0x00ACACAC00ACACAC, 0x00D8D8D800D8D8D8, 0x009A9A9A009A9A9A, +0x0017171700171717, 0x001A1A1A001A1A1A, 0x0035353500353535, 0x00CCCCCC00CCCCCC, +0x00F7F7F700F7F7F7, 0x0099999900999999, 0x0061616100616161, 0x005A5A5A005A5A5A, +0x00E8E8E800E8E8E8, 0x0024242400242424, 0x0056565600565656, 0x0040404000404040, +0x00E1E1E100E1E1E1, 0x0063636300636363, 0x0009090900090909, 0x0033333300333333, +0x00BFBFBF00BFBFBF, 0x0098989800989898, 0x0097979700979797, 0x0085858500858585, +0x0068686800686868, 0x00FCFCFC00FCFCFC, 0x00ECECEC00ECECEC, 0x000A0A0A000A0A0A, +0x00DADADA00DADADA, 0x006F6F6F006F6F6F, 0x0053535300535353, 0x0062626200626262, +0x00A3A3A300A3A3A3, 0x002E2E2E002E2E2E, 0x0008080800080808, 0x00AFAFAF00AFAFAF, +0x0028282800282828, 0x00B0B0B000B0B0B0, 0x0074747400747474, 0x00C2C2C200C2C2C2, +0x00BDBDBD00BDBDBD, 0x0036363600363636, 0x0022222200222222, 0x0038383800383838, +0x0064646400646464, 0x001E1E1E001E1E1E, 0x0039393900393939, 0x002C2C2C002C2C2C, +0x00A6A6A600A6A6A6, 0x0030303000303030, 0x00E5E5E500E5E5E5, 0x0044444400444444, +0x00FDFDFD00FDFDFD, 0x0088888800888888, 0x009F9F9F009F9F9F, 0x0065656500656565, +0x0087878700878787, 0x006B6B6B006B6B6B, 0x00F4F4F400F4F4F4, 0x0023232300232323, +0x0048484800484848, 0x0010101000101010, 0x00D1D1D100D1D1D1, 0x0051515100515151, +0x00C0C0C000C0C0C0, 0x00F9F9F900F9F9F9, 0x00D2D2D200D2D2D2, 0x00A0A0A000A0A0A0, +0x0055555500555555, 0x00A1A1A100A1A1A1, 0x0041414100414141, 0x00FAFAFA00FAFAFA, +0x0043434300434343, 0x0013131300131313, 0x00C4C4C400C4C4C4, 0x002F2F2F002F2F2F, +0x00A8A8A800A8A8A8, 0x00B6B6B600B6B6B6, 0x003C3C3C003C3C3C, 0x002B2B2B002B2B2B, +0x00C1C1C100C1C1C1, 0x00FFFFFF00FFFFFF, 0x00C8C8C800C8C8C8, 0x00A5A5A500A5A5A5, +0x0020202000202020, 0x0089898900898989, 0x0000000000000000, 0x0090909000909090, +0x0047474700474747, 0x00EFEFEF00EFEFEF, 0x00EAEAEA00EAEAEA, 0x00B7B7B700B7B7B7, +0x0015151500151515, 0x0006060600060606, 0x00CDCDCD00CDCDCD, 0x00B5B5B500B5B5B5, +0x0012121200121212, 0x007E7E7E007E7E7E, 0x00BBBBBB00BBBBBB, 0x0029292900292929, +0x000F0F0F000F0F0F, 0x00B8B8B800B8B8B8, 0x0007070700070707, 0x0004040400040404, +0x009B9B9B009B9B9B, 0x0094949400949494, 0x0021212100212121, 0x0066666600666666, +0x00E6E6E600E6E6E6, 0x00CECECE00CECECE, 0x00EDEDED00EDEDED, 0x00E7E7E700E7E7E7, +0x003B3B3B003B3B3B, 0x00FEFEFE00FEFEFE, 0x007F7F7F007F7F7F, 0x00C5C5C500C5C5C5, +0x00A4A4A400A4A4A4, 0x0037373700373737, 0x00B1B1B100B1B1B1, 0x004C4C4C004C4C4C, +0x0091919100919191, 0x006E6E6E006E6E6E, 0x008D8D8D008D8D8D, 0x0076767600767676, +0x0003030300030303, 0x002D2D2D002D2D2D, 0x00DEDEDE00DEDEDE, 0x0096969600969696, +0x0026262600262626, 0x007D7D7D007D7D7D, 0x00C6C6C600C6C6C6, 0x005C5C5C005C5C5C, +0x00D3D3D300D3D3D3, 0x00F2F2F200F2F2F2, 0x004F4F4F004F4F4F, 0x0019191900191919, +0x003F3F3F003F3F3F, 0x00DCDCDC00DCDCDC, 0x0079797900797979, 0x001D1D1D001D1D1D, +0x0052525200525252, 0x00EBEBEB00EBEBEB, 0x00F3F3F300F3F3F3, 0x006D6D6D006D6D6D, +0x005E5E5E005E5E5E, 0x00FBFBFB00FBFBFB, 0x0069696900696969, 0x00B2B2B200B2B2B2, +0x00F0F0F000F0F0F0, 0x0031313100313131, 0x000C0C0C000C0C0C, 0x00D4D4D400D4D4D4, +0x00CFCFCF00CFCFCF, 0x008C8C8C008C8C8C, 0x00E2E2E200E2E2E2, 0x0075757500757575, +0x00A9A9A900A9A9A9, 0x004A4A4A004A4A4A, 0x0057575700575757, 0x0084848400848484, +0x0011111100111111, 0x0045454500454545, 0x001B1B1B001B1B1B, 0x00F5F5F500F5F5F5, +0x00E4E4E400E4E4E4, 0x000E0E0E000E0E0E, 0x0073737300737373, 0x00AAAAAA00AAAAAA, +0x00F1F1F100F1F1F1, 0x00DDDDDD00DDDDDD, 0x0059595900595959, 0x0014141400141414, +0x006C6C6C006C6C6C, 0x0092929200929292, 0x0054545400545454, 0x00D0D0D000D0D0D0, +0x0078787800787878, 0x0070707000707070, 0x00E3E3E300E3E3E3, 0x0049494900494949, +0x0080808000808080, 0x0050505000505050, 0x00A7A7A700A7A7A7, 0x00F6F6F600F6F6F6, +0x0077777700777777, 0x0093939300939393, 0x0086868600868686, 0x0083838300838383, +0x002A2A2A002A2A2A, 0x00C7C7C700C7C7C7, 0x005B5B5B005B5B5B, 0x00E9E9E900E9E9E9, +0x00EEEEEE00EEEEEE, 0x008F8F8F008F8F8F, 0x0001010100010101, 0x003D3D3D003D3D3D }; + +alignas(64) const uint64_t Camellia_SBOX6[256] = { +0x3800383838003838, 0x4100414141004141, 0x1600161616001616, 0x7600767676007676, +0xD900D9D9D900D9D9, 0x9300939393009393, 0x6000606060006060, 0xF200F2F2F200F2F2, +0x7200727272007272, 0xC200C2C2C200C2C2, 0xAB00ABABAB00ABAB, 0x9A009A9A9A009A9A, +0x7500757575007575, 0x0600060606000606, 0x5700575757005757, 0xA000A0A0A000A0A0, +0x9100919191009191, 0xF700F7F7F700F7F7, 0xB500B5B5B500B5B5, 0xC900C9C9C900C9C9, +0xA200A2A2A200A2A2, 0x8C008C8C8C008C8C, 0xD200D2D2D200D2D2, 0x9000909090009090, +0xF600F6F6F600F6F6, 0x0700070707000707, 0xA700A7A7A700A7A7, 0x2700272727002727, +0x8E008E8E8E008E8E, 0xB200B2B2B200B2B2, 0x4900494949004949, 0xDE00DEDEDE00DEDE, +0x4300434343004343, 0x5C005C5C5C005C5C, 0xD700D7D7D700D7D7, 0xC700C7C7C700C7C7, +0x3E003E3E3E003E3E, 0xF500F5F5F500F5F5, 0x8F008F8F8F008F8F, 0x6700676767006767, +0x1F001F1F1F001F1F, 0x1800181818001818, 0x6E006E6E6E006E6E, 0xAF00AFAFAF00AFAF, +0x2F002F2F2F002F2F, 0xE200E2E2E200E2E2, 0x8500858585008585, 0x0D000D0D0D000D0D, +0x5300535353005353, 0xF000F0F0F000F0F0, 0x9C009C9C9C009C9C, 0x6500656565006565, +0xEA00EAEAEA00EAEA, 0xA300A3A3A300A3A3, 0xAE00AEAEAE00AEAE, 0x9E009E9E9E009E9E, +0xEC00ECECEC00ECEC, 0x8000808080008080, 0x2D002D2D2D002D2D, 0x6B006B6B6B006B6B, +0xA800A8A8A800A8A8, 0x2B002B2B2B002B2B, 0x3600363636003636, 0xA600A6A6A600A6A6, +0xC500C5C5C500C5C5, 0x8600868686008686, 0x4D004D4D4D004D4D, 0x3300333333003333, +0xFD00FDFDFD00FDFD, 0x6600666666006666, 0x5800585858005858, 0x9600969696009696, +0x3A003A3A3A003A3A, 0x0900090909000909, 0x9500959595009595, 0x1000101010001010, +0x7800787878007878, 0xD800D8D8D800D8D8, 0x4200424242004242, 0xCC00CCCCCC00CCCC, +0xEF00EFEFEF00EFEF, 0x2600262626002626, 0xE500E5E5E500E5E5, 0x6100616161006161, +0x1A001A1A1A001A1A, 0x3F003F3F3F003F3F, 0x3B003B3B3B003B3B, 0x8200828282008282, +0xB600B6B6B600B6B6, 0xDB00DBDBDB00DBDB, 0xD400D4D4D400D4D4, 0x9800989898009898, +0xE800E8E8E800E8E8, 0x8B008B8B8B008B8B, 0x0200020202000202, 0xEB00EBEBEB00EBEB, +0x0A000A0A0A000A0A, 0x2C002C2C2C002C2C, 0x1D001D1D1D001D1D, 0xB000B0B0B000B0B0, +0x6F006F6F6F006F6F, 0x8D008D8D8D008D8D, 0x8800888888008888, 0x0E000E0E0E000E0E, +0x1900191919001919, 0x8700878787008787, 0x4E004E4E4E004E4E, 0x0B000B0B0B000B0B, +0xA900A9A9A900A9A9, 0x0C000C0C0C000C0C, 0x7900797979007979, 0x1100111111001111, +0x7F007F7F7F007F7F, 0x2200222222002222, 0xE700E7E7E700E7E7, 0x5900595959005959, +0xE100E1E1E100E1E1, 0xDA00DADADA00DADA, 0x3D003D3D3D003D3D, 0xC800C8C8C800C8C8, +0x1200121212001212, 0x0400040404000404, 0x7400747474007474, 0x5400545454005454, +0x3000303030003030, 0x7E007E7E7E007E7E, 0xB400B4B4B400B4B4, 0x2800282828002828, +0x5500555555005555, 0x6800686868006868, 0x5000505050005050, 0xBE00BEBEBE00BEBE, +0xD000D0D0D000D0D0, 0xC400C4C4C400C4C4, 0x3100313131003131, 0xCB00CBCBCB00CBCB, +0x2A002A2A2A002A2A, 0xAD00ADADAD00ADAD, 0x0F000F0F0F000F0F, 0xCA00CACACA00CACA, +0x7000707070007070, 0xFF00FFFFFF00FFFF, 0x3200323232003232, 0x6900696969006969, +0x0800080808000808, 0x6200626262006262, 0x0000000000000000, 0x2400242424002424, +0xD100D1D1D100D1D1, 0xFB00FBFBFB00FBFB, 0xBA00BABABA00BABA, 0xED00EDEDED00EDED, +0x4500454545004545, 0x8100818181008181, 0x7300737373007373, 0x6D006D6D6D006D6D, +0x8400848484008484, 0x9F009F9F9F009F9F, 0xEE00EEEEEE00EEEE, 0x4A004A4A4A004A4A, +0xC300C3C3C300C3C3, 0x2E002E2E2E002E2E, 0xC100C1C1C100C1C1, 0x0100010101000101, +0xE600E6E6E600E6E6, 0x2500252525002525, 0x4800484848004848, 0x9900999999009999, +0xB900B9B9B900B9B9, 0xB300B3B3B300B3B3, 0x7B007B7B7B007B7B, 0xF900F9F9F900F9F9, +0xCE00CECECE00CECE, 0xBF00BFBFBF00BFBF, 0xDF00DFDFDF00DFDF, 0x7100717171007171, +0x2900292929002929, 0xCD00CDCDCD00CDCD, 0x6C006C6C6C006C6C, 0x1300131313001313, +0x6400646464006464, 0x9B009B9B9B009B9B, 0x6300636363006363, 0x9D009D9D9D009D9D, +0xC000C0C0C000C0C0, 0x4B004B4B4B004B4B, 0xB700B7B7B700B7B7, 0xA500A5A5A500A5A5, +0x8900898989008989, 0x5F005F5F5F005F5F, 0xB100B1B1B100B1B1, 0x1700171717001717, +0xF400F4F4F400F4F4, 0xBC00BCBCBC00BCBC, 0xD300D3D3D300D3D3, 0x4600464646004646, +0xCF00CFCFCF00CFCF, 0x3700373737003737, 0x5E005E5E5E005E5E, 0x4700474747004747, +0x9400949494009494, 0xFA00FAFAFA00FAFA, 0xFC00FCFCFC00FCFC, 0x5B005B5B5B005B5B, +0x9700979797009797, 0xFE00FEFEFE00FEFE, 0x5A005A5A5A005A5A, 0xAC00ACACAC00ACAC, +0x3C003C3C3C003C3C, 0x4C004C4C4C004C4C, 0x0300030303000303, 0x3500353535003535, +0xF300F3F3F300F3F3, 0x2300232323002323, 0xB800B8B8B800B8B8, 0x5D005D5D5D005D5D, +0x6A006A6A6A006A6A, 0x9200929292009292, 0xD500D5D5D500D5D5, 0x2100212121002121, +0x4400444444004444, 0x5100515151005151, 0xC600C6C6C600C6C6, 0x7D007D7D7D007D7D, +0x3900393939003939, 0x8300838383008383, 0xDC00DCDCDC00DCDC, 0xAA00AAAAAA00AAAA, +0x7C007C7C7C007C7C, 0x7700777777007777, 0x5600565656005656, 0x0500050505000505, +0x1B001B1B1B001B1B, 0xA400A4A4A400A4A4, 0x1500151515001515, 0x3400343434003434, +0x1E001E1E1E001E1E, 0x1C001C1C1C001C1C, 0xF800F8F8F800F8F8, 0x5200525252005252, +0x2000202020002020, 0x1400141414001414, 0xE900E9E9E900E9E9, 0xBD00BDBDBD00BDBD, +0xDD00DDDDDD00DDDD, 0xE400E4E4E400E4E4, 0xA100A1A1A100A1A1, 0xE000E0E0E000E0E0, +0x8A008A8A8A008A8A, 0xF100F1F1F100F1F1, 0xD600D6D6D600D6D6, 0x7A007A7A7A007A7A, +0xBB00BBBBBB00BBBB, 0xE300E3E3E300E3E3, 0x4000404040004040, 0x4F004F4F4F004F4F }; + +alignas(64) const uint64_t Camellia_SBOX7[256] = { +0x7070007070700070, 0x2C2C002C2C2C002C, 0xB3B300B3B3B300B3, 0xC0C000C0C0C000C0, +0xE4E400E4E4E400E4, 0x5757005757570057, 0xEAEA00EAEAEA00EA, 0xAEAE00AEAEAE00AE, +0x2323002323230023, 0x6B6B006B6B6B006B, 0x4545004545450045, 0xA5A500A5A5A500A5, +0xEDED00EDEDED00ED, 0x4F4F004F4F4F004F, 0x1D1D001D1D1D001D, 0x9292009292920092, +0x8686008686860086, 0xAFAF00AFAFAF00AF, 0x7C7C007C7C7C007C, 0x1F1F001F1F1F001F, +0x3E3E003E3E3E003E, 0xDCDC00DCDCDC00DC, 0x5E5E005E5E5E005E, 0x0B0B000B0B0B000B, +0xA6A600A6A6A600A6, 0x3939003939390039, 0xD5D500D5D5D500D5, 0x5D5D005D5D5D005D, +0xD9D900D9D9D900D9, 0x5A5A005A5A5A005A, 0x5151005151510051, 0x6C6C006C6C6C006C, +0x8B8B008B8B8B008B, 0x9A9A009A9A9A009A, 0xFBFB00FBFBFB00FB, 0xB0B000B0B0B000B0, +0x7474007474740074, 0x2B2B002B2B2B002B, 0xF0F000F0F0F000F0, 0x8484008484840084, +0xDFDF00DFDFDF00DF, 0xCBCB00CBCBCB00CB, 0x3434003434340034, 0x7676007676760076, +0x6D6D006D6D6D006D, 0xA9A900A9A9A900A9, 0xD1D100D1D1D100D1, 0x0404000404040004, +0x1414001414140014, 0x3A3A003A3A3A003A, 0xDEDE00DEDEDE00DE, 0x1111001111110011, +0x3232003232320032, 0x9C9C009C9C9C009C, 0x5353005353530053, 0xF2F200F2F2F200F2, +0xFEFE00FEFEFE00FE, 0xCFCF00CFCFCF00CF, 0xC3C300C3C3C300C3, 0x7A7A007A7A7A007A, +0x2424002424240024, 0xE8E800E8E8E800E8, 0x6060006060600060, 0x6969006969690069, +0xAAAA00AAAAAA00AA, 0xA0A000A0A0A000A0, 0xA1A100A1A1A100A1, 0x6262006262620062, +0x5454005454540054, 0x1E1E001E1E1E001E, 0xE0E000E0E0E000E0, 0x6464006464640064, +0x1010001010100010, 0x0000000000000000, 0xA3A300A3A3A300A3, 0x7575007575750075, +0x8A8A008A8A8A008A, 0xE6E600E6E6E600E6, 0x0909000909090009, 0xDDDD00DDDDDD00DD, +0x8787008787870087, 0x8383008383830083, 0xCDCD00CDCDCD00CD, 0x9090009090900090, +0x7373007373730073, 0xF6F600F6F6F600F6, 0x9D9D009D9D9D009D, 0xBFBF00BFBFBF00BF, +0x5252005252520052, 0xD8D800D8D8D800D8, 0xC8C800C8C8C800C8, 0xC6C600C6C6C600C6, +0x8181008181810081, 0x6F6F006F6F6F006F, 0x1313001313130013, 0x6363006363630063, +0xE9E900E9E9E900E9, 0xA7A700A7A7A700A7, 0x9F9F009F9F9F009F, 0xBCBC00BCBCBC00BC, +0x2929002929290029, 0xF9F900F9F9F900F9, 0x2F2F002F2F2F002F, 0xB4B400B4B4B400B4, +0x7878007878780078, 0x0606000606060006, 0xE7E700E7E7E700E7, 0x7171007171710071, +0xD4D400D4D4D400D4, 0xABAB00ABABAB00AB, 0x8888008888880088, 0x8D8D008D8D8D008D, +0x7272007272720072, 0xB9B900B9B9B900B9, 0xF8F800F8F8F800F8, 0xACAC00ACACAC00AC, +0x3636003636360036, 0x2A2A002A2A2A002A, 0x3C3C003C3C3C003C, 0xF1F100F1F1F100F1, +0x4040004040400040, 0xD3D300D3D3D300D3, 0xBBBB00BBBBBB00BB, 0x4343004343430043, +0x1515001515150015, 0xADAD00ADADAD00AD, 0x7777007777770077, 0x8080008080800080, +0x8282008282820082, 0xECEC00ECECEC00EC, 0x2727002727270027, 0xE5E500E5E5E500E5, +0x8585008585850085, 0x3535003535350035, 0x0C0C000C0C0C000C, 0x4141004141410041, +0xEFEF00EFEFEF00EF, 0x9393009393930093, 0x1919001919190019, 0x2121002121210021, +0x0E0E000E0E0E000E, 0x4E4E004E4E4E004E, 0x6565006565650065, 0xBDBD00BDBDBD00BD, +0xB8B800B8B8B800B8, 0x8F8F008F8F8F008F, 0xEBEB00EBEBEB00EB, 0xCECE00CECECE00CE, +0x3030003030300030, 0x5F5F005F5F5F005F, 0xC5C500C5C5C500C5, 0x1A1A001A1A1A001A, +0xE1E100E1E1E100E1, 0xCACA00CACACA00CA, 0x4747004747470047, 0x3D3D003D3D3D003D, +0x0101000101010001, 0xD6D600D6D6D600D6, 0x5656005656560056, 0x4D4D004D4D4D004D, +0x0D0D000D0D0D000D, 0x6666006666660066, 0xCCCC00CCCCCC00CC, 0x2D2D002D2D2D002D, +0x1212001212120012, 0x2020002020200020, 0xB1B100B1B1B100B1, 0x9999009999990099, +0x4C4C004C4C4C004C, 0xC2C200C2C2C200C2, 0x7E7E007E7E7E007E, 0x0505000505050005, +0xB7B700B7B7B700B7, 0x3131003131310031, 0x1717001717170017, 0xD7D700D7D7D700D7, +0x5858005858580058, 0x6161006161610061, 0x1B1B001B1B1B001B, 0x1C1C001C1C1C001C, +0x0F0F000F0F0F000F, 0x1616001616160016, 0x1818001818180018, 0x2222002222220022, +0x4444004444440044, 0xB2B200B2B2B200B2, 0xB5B500B5B5B500B5, 0x9191009191910091, +0x0808000808080008, 0xA8A800A8A8A800A8, 0xFCFC00FCFCFC00FC, 0x5050005050500050, +0xD0D000D0D0D000D0, 0x7D7D007D7D7D007D, 0x8989008989890089, 0x9797009797970097, +0x5B5B005B5B5B005B, 0x9595009595950095, 0xFFFF00FFFFFF00FF, 0xD2D200D2D2D200D2, +0xC4C400C4C4C400C4, 0x4848004848480048, 0xF7F700F7F7F700F7, 0xDBDB00DBDBDB00DB, +0x0303000303030003, 0xDADA00DADADA00DA, 0x3F3F003F3F3F003F, 0x9494009494940094, +0x5C5C005C5C5C005C, 0x0202000202020002, 0x4A4A004A4A4A004A, 0x3333003333330033, +0x6767006767670067, 0xF3F300F3F3F300F3, 0x7F7F007F7F7F007F, 0xE2E200E2E2E200E2, +0x9B9B009B9B9B009B, 0x2626002626260026, 0x3737003737370037, 0x3B3B003B3B3B003B, +0x9696009696960096, 0x4B4B004B4B4B004B, 0xBEBE00BEBEBE00BE, 0x2E2E002E2E2E002E, +0x7979007979790079, 0x8C8C008C8C8C008C, 0x6E6E006E6E6E006E, 0x8E8E008E8E8E008E, +0xF5F500F5F5F500F5, 0xB6B600B6B6B600B6, 0xFDFD00FDFDFD00FD, 0x5959005959590059, +0x9898009898980098, 0x6A6A006A6A6A006A, 0x4646004646460046, 0xBABA00BABABA00BA, +0x2525002525250025, 0x4242004242420042, 0xA2A200A2A2A200A2, 0xFAFA00FAFAFA00FA, +0x0707000707070007, 0x5555005555550055, 0xEEEE00EEEEEE00EE, 0x0A0A000A0A0A000A, +0x4949004949490049, 0x6868006868680068, 0x3838003838380038, 0xA4A400A4A4A400A4, +0x2828002828280028, 0x7B7B007B7B7B007B, 0xC9C900C9C9C900C9, 0xC1C100C1C1C100C1, +0xE3E300E3E3E300E3, 0xF4F400F4F4F400F4, 0xC7C700C7C7C700C7, 0x9E9E009E9E9E009E }; + +alignas(64) const uint64_t Camellia_SBOX8[256] = { +0x7070700070707000, 0x8282820082828200, 0x2C2C2C002C2C2C00, 0xECECEC00ECECEC00, +0xB3B3B300B3B3B300, 0x2727270027272700, 0xC0C0C000C0C0C000, 0xE5E5E500E5E5E500, +0xE4E4E400E4E4E400, 0x8585850085858500, 0x5757570057575700, 0x3535350035353500, +0xEAEAEA00EAEAEA00, 0x0C0C0C000C0C0C00, 0xAEAEAE00AEAEAE00, 0x4141410041414100, +0x2323230023232300, 0xEFEFEF00EFEFEF00, 0x6B6B6B006B6B6B00, 0x9393930093939300, +0x4545450045454500, 0x1919190019191900, 0xA5A5A500A5A5A500, 0x2121210021212100, +0xEDEDED00EDEDED00, 0x0E0E0E000E0E0E00, 0x4F4F4F004F4F4F00, 0x4E4E4E004E4E4E00, +0x1D1D1D001D1D1D00, 0x6565650065656500, 0x9292920092929200, 0xBDBDBD00BDBDBD00, +0x8686860086868600, 0xB8B8B800B8B8B800, 0xAFAFAF00AFAFAF00, 0x8F8F8F008F8F8F00, +0x7C7C7C007C7C7C00, 0xEBEBEB00EBEBEB00, 0x1F1F1F001F1F1F00, 0xCECECE00CECECE00, +0x3E3E3E003E3E3E00, 0x3030300030303000, 0xDCDCDC00DCDCDC00, 0x5F5F5F005F5F5F00, +0x5E5E5E005E5E5E00, 0xC5C5C500C5C5C500, 0x0B0B0B000B0B0B00, 0x1A1A1A001A1A1A00, +0xA6A6A600A6A6A600, 0xE1E1E100E1E1E100, 0x3939390039393900, 0xCACACA00CACACA00, +0xD5D5D500D5D5D500, 0x4747470047474700, 0x5D5D5D005D5D5D00, 0x3D3D3D003D3D3D00, +0xD9D9D900D9D9D900, 0x0101010001010100, 0x5A5A5A005A5A5A00, 0xD6D6D600D6D6D600, +0x5151510051515100, 0x5656560056565600, 0x6C6C6C006C6C6C00, 0x4D4D4D004D4D4D00, +0x8B8B8B008B8B8B00, 0x0D0D0D000D0D0D00, 0x9A9A9A009A9A9A00, 0x6666660066666600, +0xFBFBFB00FBFBFB00, 0xCCCCCC00CCCCCC00, 0xB0B0B000B0B0B000, 0x2D2D2D002D2D2D00, +0x7474740074747400, 0x1212120012121200, 0x2B2B2B002B2B2B00, 0x2020200020202000, +0xF0F0F000F0F0F000, 0xB1B1B100B1B1B100, 0x8484840084848400, 0x9999990099999900, +0xDFDFDF00DFDFDF00, 0x4C4C4C004C4C4C00, 0xCBCBCB00CBCBCB00, 0xC2C2C200C2C2C200, +0x3434340034343400, 0x7E7E7E007E7E7E00, 0x7676760076767600, 0x0505050005050500, +0x6D6D6D006D6D6D00, 0xB7B7B700B7B7B700, 0xA9A9A900A9A9A900, 0x3131310031313100, +0xD1D1D100D1D1D100, 0x1717170017171700, 0x0404040004040400, 0xD7D7D700D7D7D700, +0x1414140014141400, 0x5858580058585800, 0x3A3A3A003A3A3A00, 0x6161610061616100, +0xDEDEDE00DEDEDE00, 0x1B1B1B001B1B1B00, 0x1111110011111100, 0x1C1C1C001C1C1C00, +0x3232320032323200, 0x0F0F0F000F0F0F00, 0x9C9C9C009C9C9C00, 0x1616160016161600, +0x5353530053535300, 0x1818180018181800, 0xF2F2F200F2F2F200, 0x2222220022222200, +0xFEFEFE00FEFEFE00, 0x4444440044444400, 0xCFCFCF00CFCFCF00, 0xB2B2B200B2B2B200, +0xC3C3C300C3C3C300, 0xB5B5B500B5B5B500, 0x7A7A7A007A7A7A00, 0x9191910091919100, +0x2424240024242400, 0x0808080008080800, 0xE8E8E800E8E8E800, 0xA8A8A800A8A8A800, +0x6060600060606000, 0xFCFCFC00FCFCFC00, 0x6969690069696900, 0x5050500050505000, +0xAAAAAA00AAAAAA00, 0xD0D0D000D0D0D000, 0xA0A0A000A0A0A000, 0x7D7D7D007D7D7D00, +0xA1A1A100A1A1A100, 0x8989890089898900, 0x6262620062626200, 0x9797970097979700, +0x5454540054545400, 0x5B5B5B005B5B5B00, 0x1E1E1E001E1E1E00, 0x9595950095959500, +0xE0E0E000E0E0E000, 0xFFFFFF00FFFFFF00, 0x6464640064646400, 0xD2D2D200D2D2D200, +0x1010100010101000, 0xC4C4C400C4C4C400, 0x0000000000000000, 0x4848480048484800, +0xA3A3A300A3A3A300, 0xF7F7F700F7F7F700, 0x7575750075757500, 0xDBDBDB00DBDBDB00, +0x8A8A8A008A8A8A00, 0x0303030003030300, 0xE6E6E600E6E6E600, 0xDADADA00DADADA00, +0x0909090009090900, 0x3F3F3F003F3F3F00, 0xDDDDDD00DDDDDD00, 0x9494940094949400, +0x8787870087878700, 0x5C5C5C005C5C5C00, 0x8383830083838300, 0x0202020002020200, +0xCDCDCD00CDCDCD00, 0x4A4A4A004A4A4A00, 0x9090900090909000, 0x3333330033333300, +0x7373730073737300, 0x6767670067676700, 0xF6F6F600F6F6F600, 0xF3F3F300F3F3F300, +0x9D9D9D009D9D9D00, 0x7F7F7F007F7F7F00, 0xBFBFBF00BFBFBF00, 0xE2E2E200E2E2E200, +0x5252520052525200, 0x9B9B9B009B9B9B00, 0xD8D8D800D8D8D800, 0x2626260026262600, +0xC8C8C800C8C8C800, 0x3737370037373700, 0xC6C6C600C6C6C600, 0x3B3B3B003B3B3B00, +0x8181810081818100, 0x9696960096969600, 0x6F6F6F006F6F6F00, 0x4B4B4B004B4B4B00, +0x1313130013131300, 0xBEBEBE00BEBEBE00, 0x6363630063636300, 0x2E2E2E002E2E2E00, +0xE9E9E900E9E9E900, 0x7979790079797900, 0xA7A7A700A7A7A700, 0x8C8C8C008C8C8C00, +0x9F9F9F009F9F9F00, 0x6E6E6E006E6E6E00, 0xBCBCBC00BCBCBC00, 0x8E8E8E008E8E8E00, +0x2929290029292900, 0xF5F5F500F5F5F500, 0xF9F9F900F9F9F900, 0xB6B6B600B6B6B600, +0x2F2F2F002F2F2F00, 0xFDFDFD00FDFDFD00, 0xB4B4B400B4B4B400, 0x5959590059595900, +0x7878780078787800, 0x9898980098989800, 0x0606060006060600, 0x6A6A6A006A6A6A00, +0xE7E7E700E7E7E700, 0x4646460046464600, 0x7171710071717100, 0xBABABA00BABABA00, +0xD4D4D400D4D4D400, 0x2525250025252500, 0xABABAB00ABABAB00, 0x4242420042424200, +0x8888880088888800, 0xA2A2A200A2A2A200, 0x8D8D8D008D8D8D00, 0xFAFAFA00FAFAFA00, +0x7272720072727200, 0x0707070007070700, 0xB9B9B900B9B9B900, 0x5555550055555500, +0xF8F8F800F8F8F800, 0xEEEEEE00EEEEEE00, 0xACACAC00ACACAC00, 0x0A0A0A000A0A0A00, +0x3636360036363600, 0x4949490049494900, 0x2A2A2A002A2A2A00, 0x6868680068686800, +0x3C3C3C003C3C3C00, 0x3838380038383800, 0xF1F1F100F1F1F100, 0xA4A4A400A4A4A400, +0x4040400040404000, 0x2828280028282800, 0xD3D3D300D3D3D300, 0x7B7B7B007B7B7B00, +0xBBBBBB00BBBBBB00, 0xC9C9C900C9C9C900, 0x4343430043434300, 0xC1C1C100C1C1C100, +0x1515150015151500, 0xE3E3E300E3E3E300, 0xADADAD00ADADAD00, 0xF4F4F400F4F4F400, +0x7777770077777700, 0xC7C7C700C7C7C700, 0x8080800080808000, 0x9E9E9E009E9E9E00 }; + +namespace Camellia_F { + +/* +* We use the slow byte-wise version of F in the first and last rounds +* to help protect against side channels analyzing cache hits on the +* larger sbox tables. +*/ +uint64_t F_SLOW(uint64_t v, uint64_t K) + { + alignas(64) + static const uint8_t SBOX[256] = { + 0x70, 0x82, 0x2C, 0xEC, 0xB3, 0x27, 0xC0, 0xE5, 0xE4, 0x85, 0x57, + 0x35, 0xEA, 0x0C, 0xAE, 0x41, 0x23, 0xEF, 0x6B, 0x93, 0x45, 0x19, + 0xA5, 0x21, 0xED, 0x0E, 0x4F, 0x4E, 0x1D, 0x65, 0x92, 0xBD, 0x86, + 0xB8, 0xAF, 0x8F, 0x7C, 0xEB, 0x1F, 0xCE, 0x3E, 0x30, 0xDC, 0x5F, + 0x5E, 0xC5, 0x0B, 0x1A, 0xA6, 0xE1, 0x39, 0xCA, 0xD5, 0x47, 0x5D, + 0x3D, 0xD9, 0x01, 0x5A, 0xD6, 0x51, 0x56, 0x6C, 0x4D, 0x8B, 0x0D, + 0x9A, 0x66, 0xFB, 0xCC, 0xB0, 0x2D, 0x74, 0x12, 0x2B, 0x20, 0xF0, + 0xB1, 0x84, 0x99, 0xDF, 0x4C, 0xCB, 0xC2, 0x34, 0x7E, 0x76, 0x05, + 0x6D, 0xB7, 0xA9, 0x31, 0xD1, 0x17, 0x04, 0xD7, 0x14, 0x58, 0x3A, + 0x61, 0xDE, 0x1B, 0x11, 0x1C, 0x32, 0x0F, 0x9C, 0x16, 0x53, 0x18, + 0xF2, 0x22, 0xFE, 0x44, 0xCF, 0xB2, 0xC3, 0xB5, 0x7A, 0x91, 0x24, + 0x08, 0xE8, 0xA8, 0x60, 0xFC, 0x69, 0x50, 0xAA, 0xD0, 0xA0, 0x7D, + 0xA1, 0x89, 0x62, 0x97, 0x54, 0x5B, 0x1E, 0x95, 0xE0, 0xFF, 0x64, + 0xD2, 0x10, 0xC4, 0x00, 0x48, 0xA3, 0xF7, 0x75, 0xDB, 0x8A, 0x03, + 0xE6, 0xDA, 0x09, 0x3F, 0xDD, 0x94, 0x87, 0x5C, 0x83, 0x02, 0xCD, + 0x4A, 0x90, 0x33, 0x73, 0x67, 0xF6, 0xF3, 0x9D, 0x7F, 0xBF, 0xE2, + 0x52, 0x9B, 0xD8, 0x26, 0xC8, 0x37, 0xC6, 0x3B, 0x81, 0x96, 0x6F, + 0x4B, 0x13, 0xBE, 0x63, 0x2E, 0xE9, 0x79, 0xA7, 0x8C, 0x9F, 0x6E, + 0xBC, 0x8E, 0x29, 0xF5, 0xF9, 0xB6, 0x2F, 0xFD, 0xB4, 0x59, 0x78, + 0x98, 0x06, 0x6A, 0xE7, 0x46, 0x71, 0xBA, 0xD4, 0x25, 0xAB, 0x42, + 0x88, 0xA2, 0x8D, 0xFA, 0x72, 0x07, 0xB9, 0x55, 0xF8, 0xEE, 0xAC, + 0x0A, 0x36, 0x49, 0x2A, 0x68, 0x3C, 0x38, 0xF1, 0xA4, 0x40, 0x28, + 0xD3, 0x7B, 0xBB, 0xC9, 0x43, 0xC1, 0x15, 0xE3, 0xAD, 0xF4, 0x77, + 0xC7, 0x80, 0x9E }; + + const uint64_t x = v ^ K; + + const uint8_t t1 = SBOX[get_byte(0, x)]; + const uint8_t t2 = rotl<1>(SBOX[get_byte(1, x)]); + const uint8_t t3 = rotl<7>(SBOX[get_byte(2, x)]); + const uint8_t t4 = SBOX[rotl<1>(get_byte(3, x))]; + const uint8_t t5 = rotl<1>(SBOX[get_byte(4, x)]); + const uint8_t t6 = rotl<7>(SBOX[get_byte(5, x)]); + const uint8_t t7 = SBOX[rotl<1>(get_byte(6, x))]; + const uint8_t t8 = SBOX[get_byte(7, x)]; + + const uint8_t y1 = t1 ^ t3 ^ t4 ^ t6 ^ t7 ^ t8; + const uint8_t y2 = t1 ^ t2 ^ t4 ^ t5 ^ t7 ^ t8; + const uint8_t y3 = t1 ^ t2 ^ t3 ^ t5 ^ t6 ^ t8; + const uint8_t y4 = t2 ^ t3 ^ t4 ^ t5 ^ t6 ^ t7; + const uint8_t y5 = t1 ^ t2 ^ t6 ^ t7 ^ t8; + const uint8_t y6 = t2 ^ t3 ^ t5 ^ t7 ^ t8; + const uint8_t y7 = t3 ^ t4 ^ t5 ^ t6 ^ t8; + const uint8_t y8 = t1 ^ t4 ^ t5 ^ t6 ^ t7; + + return make_uint64(y1, y2, y3, y4, y5, y6, y7, y8); + } + +inline uint64_t F(uint64_t v, uint64_t K) + { + const uint64_t x = v ^ K; + + return Camellia_SBOX1[get_byte(0, x)] ^ + Camellia_SBOX2[get_byte(1, x)] ^ + Camellia_SBOX3[get_byte(2, x)] ^ + Camellia_SBOX4[get_byte(3, x)] ^ + Camellia_SBOX5[get_byte(4, x)] ^ + Camellia_SBOX6[get_byte(5, x)] ^ + Camellia_SBOX7[get_byte(6, x)] ^ + Camellia_SBOX8[get_byte(7, x)]; + } + +inline uint64_t FL(uint64_t v, uint64_t K) + { + uint32_t x1 = static_cast<uint32_t>(v >> 32); + uint32_t x2 = static_cast<uint32_t>(v & 0xFFFFFFFF); + + const uint32_t k1 = static_cast<uint32_t>(K >> 32); + const uint32_t k2 = static_cast<uint32_t>(K & 0xFFFFFFFF); + + x2 ^= rotl<1>(x1 & k1); + x1 ^= (x2 | k2); + + return ((static_cast<uint64_t>(x1) << 32) | x2); + } + +inline uint64_t FLINV(uint64_t v, uint64_t K) + { + uint32_t x1 = static_cast<uint32_t>(v >> 32); + uint32_t x2 = static_cast<uint32_t>(v & 0xFFFFFFFF); + + const uint32_t k1 = static_cast<uint32_t>(K >> 32); + const uint32_t k2 = static_cast<uint32_t>(K & 0xFFFFFFFF); + + x1 ^= (x2 | k2); + x2 ^= rotl<1>(x1 & k1); + + return ((static_cast<uint64_t>(x1) << 32) | x2); + } + +/* +* Camellia Encryption +*/ +void encrypt(const uint8_t in[], uint8_t out[], size_t blocks, + const secure_vector<uint64_t>& SK, const size_t rounds) + { + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) + { + uint64_t D1, D2; + load_be(in + 16*i, D1, D2); + + const uint64_t* K = SK.data(); + + D1 ^= *K++; + D2 ^= *K++; + + D2 ^= F_SLOW(D1, *K++); + D1 ^= F_SLOW(D2, *K++); + + for(size_t r = 1; r != rounds - 1; ++r) + { + if(r % 3 == 0) + { + D1 = FL (D1, *K++); + D2 = FLINV(D2, *K++); + } + + D2 ^= F(D1, *K++); + D1 ^= F(D2, *K++); + } + + D2 ^= F_SLOW(D1, *K++); + D1 ^= F_SLOW(D2, *K++); + + D2 ^= *K++; + D1 ^= *K++; + + store_be(out + 16*i, D2, D1); + } + } + +/* +* Camellia Decryption +*/ +void decrypt(const uint8_t in[], uint8_t out[], size_t blocks, + const secure_vector<uint64_t>& SK, const size_t rounds) + { + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) + { + uint64_t D1, D2; + load_be(in + 16*i, D1, D2); + + const uint64_t* K = &SK[SK.size()-1]; + + D2 ^= *K--; + D1 ^= *K--; + + D2 ^= F_SLOW(D1, *K--); + D1 ^= F_SLOW(D2, *K--); + + for(size_t r = 1; r != rounds - 1; ++r) + { + if(r % 3 == 0) + { + D1 = FL (D1, *K--); + D2 = FLINV(D2, *K--); + } + + D2 ^= F(D1, *K--); + D1 ^= F(D2, *K--); + } + + D2 ^= F_SLOW(D1, *K--); + D1 ^= F_SLOW(D2, *K--); + + D1 ^= *K--; + D2 ^= *K; + + store_be(out + 16*i, D2, D1); + } + } + +uint64_t left_rot_hi(uint64_t h, uint64_t l, size_t shift) + { + return (h << shift) | (l >> (64-shift)); + } + +uint64_t left_rot_lo(uint64_t h, uint64_t l, size_t shift) + { + return (h >> (64-shift)) | (l << shift); + } + +/* +* Camellia Key Schedule +*/ +void key_schedule(secure_vector<uint64_t>& SK, const uint8_t key[], size_t length) + { + const uint64_t Sigma1 = 0xA09E667F3BCC908B; + const uint64_t Sigma2 = 0xB67AE8584CAA73B2; + const uint64_t Sigma3 = 0xC6EF372FE94F82BE; + const uint64_t Sigma4 = 0x54FF53A5F1D36F1C; + const uint64_t Sigma5 = 0x10E527FADE682D1D; + const uint64_t Sigma6 = 0xB05688C2B3E6C1FD; + + const uint64_t KL_H = load_be<uint64_t>(key, 0); + const uint64_t KL_L = load_be<uint64_t>(key, 1); + + const uint64_t KR_H = (length >= 24) ? load_be<uint64_t>(key, 2) : 0; + const uint64_t KR_L = + (length == 32) ? load_be<uint64_t>(key, 3) : ((length == 24) ? ~KR_H : 0); + + uint64_t D1 = KL_H ^ KR_H; + uint64_t D2 = KL_L ^ KR_L; + D2 ^= F(D1, Sigma1); + D1 ^= F(D2, Sigma2); + D1 ^= KL_H; + D2 ^= KL_L; + D2 ^= F(D1, Sigma3); + D1 ^= F(D2, Sigma4); + + const uint64_t KA_H = D1; + const uint64_t KA_L = D2; + + D1 = KA_H ^ KR_H; + D2 = KA_L ^ KR_L; + D2 ^= F(D1, Sigma5); + D1 ^= F(D2, Sigma6); + + const uint64_t KB_H = D1; + const uint64_t KB_L = D2; + + if(length == 16) + { + SK.resize(26); + + SK[ 0] = KL_H; + SK[ 1] = KL_L; + SK[ 2] = KA_H; + SK[ 3] = KA_L; + SK[ 4] = left_rot_hi(KL_H, KL_L, 15); + SK[ 5] = left_rot_lo(KL_H, KL_L, 15); + SK[ 6] = left_rot_hi(KA_H, KA_L, 15); + SK[ 7] = left_rot_lo(KA_H, KA_L, 15); + SK[ 8] = left_rot_hi(KA_H, KA_L, 30); + SK[ 9] = left_rot_lo(KA_H, KA_L, 30); + SK[10] = left_rot_hi(KL_H, KL_L, 45); + SK[11] = left_rot_lo(KL_H, KL_L, 45); + SK[12] = left_rot_hi(KA_H, KA_L, 45); + SK[13] = left_rot_lo(KL_H, KL_L, 60); + SK[14] = left_rot_hi(KA_H, KA_L, 60); + SK[15] = left_rot_lo(KA_H, KA_L, 60); + SK[16] = left_rot_lo(KL_H, KL_L, 77-64); + SK[17] = left_rot_hi(KL_H, KL_L, 77-64); + SK[18] = left_rot_lo(KL_H, KL_L, 94-64); + SK[19] = left_rot_hi(KL_H, KL_L, 94-64); + SK[20] = left_rot_lo(KA_H, KA_L, 94-64); + SK[21] = left_rot_hi(KA_H, KA_L, 94-64); + SK[22] = left_rot_lo(KL_H, KL_L, 111-64); + SK[23] = left_rot_hi(KL_H, KL_L, 111-64); + SK[24] = left_rot_lo(KA_H, KA_L, 111-64); + SK[25] = left_rot_hi(KA_H, KA_L, 111-64); + } + else + { + SK.resize(34); + + SK[ 0] = KL_H; + SK[ 1] = KL_L; + SK[ 2] = KB_H; + SK[ 3] = KB_L; + + SK[ 4] = left_rot_hi(KR_H, KR_L, 15); + SK[ 5] = left_rot_lo(KR_H, KR_L, 15); + SK[ 6] = left_rot_hi(KA_H, KA_L, 15); + SK[ 7] = left_rot_lo(KA_H, KA_L, 15); + + SK[ 8] = left_rot_hi(KR_H, KR_L, 30); + SK[ 9] = left_rot_lo(KR_H, KR_L, 30); + SK[10] = left_rot_hi(KB_H, KB_L, 30); + SK[11] = left_rot_lo(KB_H, KB_L, 30); + + SK[12] = left_rot_hi(KL_H, KL_L, 45); + SK[13] = left_rot_lo(KL_H, KL_L, 45); + SK[14] = left_rot_hi(KA_H, KA_L, 45); + SK[15] = left_rot_lo(KA_H, KA_L, 45); + + SK[16] = left_rot_hi(KL_H, KL_L, 60); + SK[17] = left_rot_lo(KL_H, KL_L, 60); + SK[18] = left_rot_hi(KR_H, KR_L, 60); + SK[19] = left_rot_lo(KR_H, KR_L, 60); + SK[20] = left_rot_hi(KB_H, KB_L, 60); + SK[21] = left_rot_lo(KB_H, KB_L, 60); + + SK[22] = left_rot_lo(KL_H, KL_L, 77-64); + SK[23] = left_rot_hi(KL_H, KL_L, 77-64); + SK[24] = left_rot_lo(KA_H, KA_L, 77-64); + SK[25] = left_rot_hi(KA_H, KA_L, 77-64); + + SK[26] = left_rot_lo(KR_H, KR_L, 94-64); + SK[27] = left_rot_hi(KR_H, KR_L, 94-64); + SK[28] = left_rot_lo(KA_H, KA_L, 94-64); + SK[29] = left_rot_hi(KA_H, KA_L, 94-64); + SK[30] = left_rot_lo(KL_H, KL_L, 111-64); + SK[31] = left_rot_hi(KL_H, KL_L, 111-64); + SK[32] = left_rot_lo(KB_H, KB_L, 111-64); + SK[33] = left_rot_hi(KB_H, KB_L, 111-64); + } + } + +} + +} + +void Camellia_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_SK.empty() == false); + Camellia_F::encrypt(in, out, blocks, m_SK, 9); + } + +void Camellia_192::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_SK.empty() == false); + Camellia_F::encrypt(in, out, blocks, m_SK, 12); + } + +void Camellia_256::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_SK.empty() == false); + Camellia_F::encrypt(in, out, blocks, m_SK, 12); + } + +void Camellia_128::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_SK.empty() == false); + Camellia_F::decrypt(in, out, blocks, m_SK, 9); + } + +void Camellia_192::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_SK.empty() == false); + Camellia_F::decrypt(in, out, blocks, m_SK, 12); + } + +void Camellia_256::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_SK.empty() == false); + Camellia_F::decrypt(in, out, blocks, m_SK, 12); + } + +void Camellia_128::key_schedule(const uint8_t key[], size_t length) + { + Camellia_F::key_schedule(m_SK, key, length); + } + +void Camellia_192::key_schedule(const uint8_t key[], size_t length) + { + Camellia_F::key_schedule(m_SK, key, length); + } + +void Camellia_256::key_schedule(const uint8_t key[], size_t length) + { + Camellia_F::key_schedule(m_SK, key, length); + } + +void Camellia_128::clear() + { + zap(m_SK); + } + +void Camellia_192::clear() + { + zap(m_SK); + } + +void Camellia_256::clear() + { + zap(m_SK); + } + +} diff --git a/comm/third_party/botan/src/lib/block/camellia/camellia.h b/comm/third_party/botan/src/lib/block/camellia/camellia.h new file mode 100644 index 0000000000..4995eb0c93 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/camellia/camellia.h @@ -0,0 +1,73 @@ +/* +* Camellia +* (C) 2012 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_CAMELLIA_H_ +#define BOTAN_CAMELLIA_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(camellia.h) + +namespace Botan { + +/** +* Camellia-128 +*/ +class BOTAN_PUBLIC_API(2,0) Camellia_128 final : public Block_Cipher_Fixed_Params<16, 16> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string name() const override { return "Camellia-128"; } + BlockCipher* clone() const override { return new Camellia_128; } + private: + void key_schedule(const uint8_t key[], size_t length) override; + + secure_vector<uint64_t> m_SK; + }; + +/** +* Camellia-192 +*/ +class BOTAN_PUBLIC_API(2,0) Camellia_192 final : public Block_Cipher_Fixed_Params<16, 24> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string name() const override { return "Camellia-192"; } + BlockCipher* clone() const override { return new Camellia_192; } + private: + void key_schedule(const uint8_t key[], size_t length) override; + + secure_vector<uint64_t> m_SK; + }; + +/** +* Camellia-256 +*/ +class BOTAN_PUBLIC_API(2,0) Camellia_256 final : public Block_Cipher_Fixed_Params<16, 32> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string name() const override { return "Camellia-256"; } + BlockCipher* clone() const override { return new Camellia_256; } + private: + void key_schedule(const uint8_t key[], size_t length) override; + + secure_vector<uint64_t> m_SK; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/camellia/info.txt b/comm/third_party/botan/src/lib/block/camellia/info.txt new file mode 100644 index 0000000000..c70a7f3451 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/camellia/info.txt @@ -0,0 +1,7 @@ +<defines> +CAMELLIA -> 20150922 +</defines> + +<header:public> +camellia.h +</header:public> diff --git a/comm/third_party/botan/src/lib/block/cascade/cascade.cpp b/comm/third_party/botan/src/lib/block/cascade/cascade.cpp new file mode 100644 index 0000000000..6607fd5b27 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/cascade/cascade.cpp @@ -0,0 +1,93 @@ +/* +* Block Cipher Cascade +* (C) 2010 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/cascade.h> + +namespace Botan { + +void Cascade_Cipher::encrypt_n(const uint8_t in[], uint8_t out[], + size_t blocks) const + { + size_t c1_blocks = blocks * (block_size() / m_cipher1->block_size()); + size_t c2_blocks = blocks * (block_size() / m_cipher2->block_size()); + + m_cipher1->encrypt_n(in, out, c1_blocks); + m_cipher2->encrypt_n(out, out, c2_blocks); + } + +void Cascade_Cipher::decrypt_n(const uint8_t in[], uint8_t out[], + size_t blocks) const + { + size_t c1_blocks = blocks * (block_size() / m_cipher1->block_size()); + size_t c2_blocks = blocks * (block_size() / m_cipher2->block_size()); + + m_cipher2->decrypt_n(in, out, c2_blocks); + m_cipher1->decrypt_n(out, out, c1_blocks); + } + +void Cascade_Cipher::key_schedule(const uint8_t key[], size_t) + { + const uint8_t* key2 = key + m_cipher1->maximum_keylength(); + + m_cipher1->set_key(key , m_cipher1->maximum_keylength()); + m_cipher2->set_key(key2, m_cipher2->maximum_keylength()); + } + +void Cascade_Cipher::clear() + { + m_cipher1->clear(); + m_cipher2->clear(); + } + +std::string Cascade_Cipher::name() const + { + return "Cascade(" + m_cipher1->name() + "," + m_cipher2->name() + ")"; + } + +BlockCipher* Cascade_Cipher::clone() const + { + return new Cascade_Cipher(m_cipher1->clone(), + m_cipher2->clone()); + } + +namespace { + +size_t euclids_algorithm(size_t a, size_t b) + { + while(b != 0) + { + size_t t = b; + b = a % b; + a = t; + } + + return a; + } + +size_t block_size_for_cascade(size_t bs, size_t bs2) + { + if(bs == bs2) + return bs; + + const size_t gcd = euclids_algorithm(bs, bs2); + + return (bs * bs2) / gcd; + } + +} + +Cascade_Cipher::Cascade_Cipher(BlockCipher* c1, BlockCipher* c2) : + m_cipher1(c1), m_cipher2(c2) + { + m_block = block_size_for_cascade(c1->block_size(), c2->block_size()); + + BOTAN_ASSERT(m_block % c1->block_size() == 0 && + m_block % c2->block_size() == 0, + "Combined block size is a multiple of each ciphers block"); + } + +} diff --git a/comm/third_party/botan/src/lib/block/cascade/cascade.h b/comm/third_party/botan/src/lib/block/cascade/cascade.h new file mode 100644 index 0000000000..26f5133811 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/cascade/cascade.h @@ -0,0 +1,57 @@ +/* +* Block Cipher Cascade +* (C) 2010 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_CASCADE_H_ +#define BOTAN_CASCADE_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(cascade.h) + +namespace Botan { + +/** +* Block Cipher Cascade +*/ +class BOTAN_PUBLIC_API(2,0) Cascade_Cipher final : public BlockCipher + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + size_t block_size() const override { return m_block; } + + Key_Length_Specification key_spec() const override + { + return Key_Length_Specification(m_cipher1->maximum_keylength() + + m_cipher2->maximum_keylength()); + } + + void clear() override; + std::string name() const override; + BlockCipher* clone() const override; + + /** + * Create a cascade of two block ciphers + * @param cipher1 the first cipher + * @param cipher2 the second cipher + */ + Cascade_Cipher(BlockCipher* cipher1, BlockCipher* cipher2); + + Cascade_Cipher(const Cascade_Cipher&) = delete; + Cascade_Cipher& operator=(const Cascade_Cipher&) = delete; + private: + void key_schedule(const uint8_t[], size_t) override; + + size_t m_block; + std::unique_ptr<BlockCipher> m_cipher1, m_cipher2; + }; + + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/cascade/info.txt b/comm/third_party/botan/src/lib/block/cascade/info.txt new file mode 100644 index 0000000000..15f5b22627 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/cascade/info.txt @@ -0,0 +1,7 @@ +<defines> +CASCADE -> 20131128 +</defines> + +<header:public> +cascade.h +</header:public> diff --git a/comm/third_party/botan/src/lib/block/cast128/cast128.cpp b/comm/third_party/botan/src/lib/block/cast128/cast128.cpp new file mode 100644 index 0000000000..bcb273be7a --- /dev/null +++ b/comm/third_party/botan/src/lib/block/cast128/cast128.cpp @@ -0,0 +1,471 @@ +/* +* CAST-128 +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/cast128.h> +#include <botan/internal/cast_sboxes.h> +#include <botan/loadstor.h> +#include <botan/rotate.h> + +namespace Botan { + +namespace { + +/* +* CAST-128 Round Type 1 +*/ +inline uint32_t F1(uint32_t R, uint32_t MK, uint8_t RK) + { + const uint32_t T = rotl_var(MK + R, RK); + return (CAST_SBOX1[get_byte(0, T)] ^ CAST_SBOX2[get_byte(1, T)]) - + CAST_SBOX3[get_byte(2, T)] + CAST_SBOX4[get_byte(3, T)]; + } + +/* +* CAST-128 Round Type 2 +*/ +inline uint32_t F2(uint32_t R, uint32_t MK, uint8_t RK) + { + const uint32_t T = rotl_var(MK ^ R, RK); + return (CAST_SBOX1[get_byte(0, T)] - CAST_SBOX2[get_byte(1, T)] + + CAST_SBOX3[get_byte(2, T)]) ^ CAST_SBOX4[get_byte(3, T)]; + } + +/* +* CAST-128 Round Type 3 +*/ +inline uint32_t F3(uint32_t R, uint32_t MK, uint8_t RK) + { + const uint32_t T = rotl_var(MK - R, RK); + return ((CAST_SBOX1[get_byte(0, T)] + CAST_SBOX2[get_byte(1, T)]) ^ + CAST_SBOX3[get_byte(2, T)]) - CAST_SBOX4[get_byte(3, T)]; + } + +} + +/* +* CAST-128 Encryption +*/ +void CAST_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_RK.empty() == false); + + while(blocks >= 2) + { + uint32_t L0, R0, L1, R1; + load_be(in, L0, R0, L1, R1); + + L0 ^= F1(R0, m_MK[ 0], m_RK[ 0]); + L1 ^= F1(R1, m_MK[ 0], m_RK[ 0]); + R0 ^= F2(L0, m_MK[ 1], m_RK[ 1]); + R1 ^= F2(L1, m_MK[ 1], m_RK[ 1]); + L0 ^= F3(R0, m_MK[ 2], m_RK[ 2]); + L1 ^= F3(R1, m_MK[ 2], m_RK[ 2]); + R0 ^= F1(L0, m_MK[ 3], m_RK[ 3]); + R1 ^= F1(L1, m_MK[ 3], m_RK[ 3]); + L0 ^= F2(R0, m_MK[ 4], m_RK[ 4]); + L1 ^= F2(R1, m_MK[ 4], m_RK[ 4]); + R0 ^= F3(L0, m_MK[ 5], m_RK[ 5]); + R1 ^= F3(L1, m_MK[ 5], m_RK[ 5]); + L0 ^= F1(R0, m_MK[ 6], m_RK[ 6]); + L1 ^= F1(R1, m_MK[ 6], m_RK[ 6]); + R0 ^= F2(L0, m_MK[ 7], m_RK[ 7]); + R1 ^= F2(L1, m_MK[ 7], m_RK[ 7]); + L0 ^= F3(R0, m_MK[ 8], m_RK[ 8]); + L1 ^= F3(R1, m_MK[ 8], m_RK[ 8]); + R0 ^= F1(L0, m_MK[ 9], m_RK[ 9]); + R1 ^= F1(L1, m_MK[ 9], m_RK[ 9]); + L0 ^= F2(R0, m_MK[10], m_RK[10]); + L1 ^= F2(R1, m_MK[10], m_RK[10]); + R0 ^= F3(L0, m_MK[11], m_RK[11]); + R1 ^= F3(L1, m_MK[11], m_RK[11]); + L0 ^= F1(R0, m_MK[12], m_RK[12]); + L1 ^= F1(R1, m_MK[12], m_RK[12]); + R0 ^= F2(L0, m_MK[13], m_RK[13]); + R1 ^= F2(L1, m_MK[13], m_RK[13]); + L0 ^= F3(R0, m_MK[14], m_RK[14]); + L1 ^= F3(R1, m_MK[14], m_RK[14]); + R0 ^= F1(L0, m_MK[15], m_RK[15]); + R1 ^= F1(L1, m_MK[15], m_RK[15]); + + store_be(out, R0, L0, R1, L1); + + blocks -= 2; + out += 2 * BLOCK_SIZE; + in += 2 * BLOCK_SIZE; + } + + if(blocks) + { + uint32_t L, R; + load_be(in, L, R); + + L ^= F1(R, m_MK[ 0], m_RK[ 0]); + R ^= F2(L, m_MK[ 1], m_RK[ 1]); + L ^= F3(R, m_MK[ 2], m_RK[ 2]); + R ^= F1(L, m_MK[ 3], m_RK[ 3]); + L ^= F2(R, m_MK[ 4], m_RK[ 4]); + R ^= F3(L, m_MK[ 5], m_RK[ 5]); + L ^= F1(R, m_MK[ 6], m_RK[ 6]); + R ^= F2(L, m_MK[ 7], m_RK[ 7]); + L ^= F3(R, m_MK[ 8], m_RK[ 8]); + R ^= F1(L, m_MK[ 9], m_RK[ 9]); + L ^= F2(R, m_MK[10], m_RK[10]); + R ^= F3(L, m_MK[11], m_RK[11]); + L ^= F1(R, m_MK[12], m_RK[12]); + R ^= F2(L, m_MK[13], m_RK[13]); + L ^= F3(R, m_MK[14], m_RK[14]); + R ^= F1(L, m_MK[15], m_RK[15]); + + store_be(out, R, L); + } + } + +/* +* CAST-128 Decryption +*/ +void CAST_128::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_RK.empty() == false); + + while(blocks >= 2) + { + uint32_t L0, R0, L1, R1; + load_be(in, L0, R0, L1, R1); + + L0 ^= F1(R0, m_MK[15], m_RK[15]); + L1 ^= F1(R1, m_MK[15], m_RK[15]); + R0 ^= F3(L0, m_MK[14], m_RK[14]); + R1 ^= F3(L1, m_MK[14], m_RK[14]); + L0 ^= F2(R0, m_MK[13], m_RK[13]); + L1 ^= F2(R1, m_MK[13], m_RK[13]); + R0 ^= F1(L0, m_MK[12], m_RK[12]); + R1 ^= F1(L1, m_MK[12], m_RK[12]); + L0 ^= F3(R0, m_MK[11], m_RK[11]); + L1 ^= F3(R1, m_MK[11], m_RK[11]); + R0 ^= F2(L0, m_MK[10], m_RK[10]); + R1 ^= F2(L1, m_MK[10], m_RK[10]); + L0 ^= F1(R0, m_MK[ 9], m_RK[ 9]); + L1 ^= F1(R1, m_MK[ 9], m_RK[ 9]); + R0 ^= F3(L0, m_MK[ 8], m_RK[ 8]); + R1 ^= F3(L1, m_MK[ 8], m_RK[ 8]); + L0 ^= F2(R0, m_MK[ 7], m_RK[ 7]); + L1 ^= F2(R1, m_MK[ 7], m_RK[ 7]); + R0 ^= F1(L0, m_MK[ 6], m_RK[ 6]); + R1 ^= F1(L1, m_MK[ 6], m_RK[ 6]); + L0 ^= F3(R0, m_MK[ 5], m_RK[ 5]); + L1 ^= F3(R1, m_MK[ 5], m_RK[ 5]); + R0 ^= F2(L0, m_MK[ 4], m_RK[ 4]); + R1 ^= F2(L1, m_MK[ 4], m_RK[ 4]); + L0 ^= F1(R0, m_MK[ 3], m_RK[ 3]); + L1 ^= F1(R1, m_MK[ 3], m_RK[ 3]); + R0 ^= F3(L0, m_MK[ 2], m_RK[ 2]); + R1 ^= F3(L1, m_MK[ 2], m_RK[ 2]); + L0 ^= F2(R0, m_MK[ 1], m_RK[ 1]); + L1 ^= F2(R1, m_MK[ 1], m_RK[ 1]); + R0 ^= F1(L0, m_MK[ 0], m_RK[ 0]); + R1 ^= F1(L1, m_MK[ 0], m_RK[ 0]); + + store_be(out, R0, L0, R1, L1); + + blocks -= 2; + out += 2 * BLOCK_SIZE; + in += 2 * BLOCK_SIZE; + } + + if(blocks) + { + uint32_t L, R; + load_be(in, L, R); + + L ^= F1(R, m_MK[15], m_RK[15]); + R ^= F3(L, m_MK[14], m_RK[14]); + L ^= F2(R, m_MK[13], m_RK[13]); + R ^= F1(L, m_MK[12], m_RK[12]); + L ^= F3(R, m_MK[11], m_RK[11]); + R ^= F2(L, m_MK[10], m_RK[10]); + L ^= F1(R, m_MK[ 9], m_RK[ 9]); + R ^= F3(L, m_MK[ 8], m_RK[ 8]); + L ^= F2(R, m_MK[ 7], m_RK[ 7]); + R ^= F1(L, m_MK[ 6], m_RK[ 6]); + L ^= F3(R, m_MK[ 5], m_RK[ 5]); + R ^= F2(L, m_MK[ 4], m_RK[ 4]); + L ^= F1(R, m_MK[ 3], m_RK[ 3]); + R ^= F3(L, m_MK[ 2], m_RK[ 2]); + L ^= F2(R, m_MK[ 1], m_RK[ 1]); + R ^= F1(L, m_MK[ 0], m_RK[ 0]); + + store_be(out, R, L); + } + } + +/* +* CAST-128 Key Schedule +*/ +void CAST_128::key_schedule(const uint8_t key[], size_t length) + { + m_MK.resize(48); + m_RK.resize(48); + + secure_vector<uint8_t> key16(16); + copy_mem(key16.data(), key, length); + + secure_vector<uint32_t> X(4); + for(size_t i = 0; i != 4; ++i) + X[i] = load_be<uint32_t>(key16.data(), i); + + cast_ks(m_MK, X); + + secure_vector<uint32_t> RK32(48); + cast_ks(RK32, X); + + for(size_t i = 0; i != 16; ++i) + m_RK[i] = RK32[i] % 32; + } + +void CAST_128::clear() + { + zap(m_MK); + zap(m_RK); + } + +/* +* S-Box Based Key Expansion +*/ +void CAST_128::cast_ks(secure_vector<uint32_t>& K, + secure_vector<uint32_t>& X) + { + alignas(64) static const uint32_t S5[256] = { + 0x7EC90C04, 0x2C6E74B9, 0x9B0E66DF, 0xA6337911, 0xB86A7FFF, 0x1DD358F5, + 0x44DD9D44, 0x1731167F, 0x08FBF1FA, 0xE7F511CC, 0xD2051B00, 0x735ABA00, + 0x2AB722D8, 0x386381CB, 0xACF6243A, 0x69BEFD7A, 0xE6A2E77F, 0xF0C720CD, + 0xC4494816, 0xCCF5C180, 0x38851640, 0x15B0A848, 0xE68B18CB, 0x4CAADEFF, + 0x5F480A01, 0x0412B2AA, 0x259814FC, 0x41D0EFE2, 0x4E40B48D, 0x248EB6FB, + 0x8DBA1CFE, 0x41A99B02, 0x1A550A04, 0xBA8F65CB, 0x7251F4E7, 0x95A51725, + 0xC106ECD7, 0x97A5980A, 0xC539B9AA, 0x4D79FE6A, 0xF2F3F763, 0x68AF8040, + 0xED0C9E56, 0x11B4958B, 0xE1EB5A88, 0x8709E6B0, 0xD7E07156, 0x4E29FEA7, + 0x6366E52D, 0x02D1C000, 0xC4AC8E05, 0x9377F571, 0x0C05372A, 0x578535F2, + 0x2261BE02, 0xD642A0C9, 0xDF13A280, 0x74B55BD2, 0x682199C0, 0xD421E5EC, + 0x53FB3CE8, 0xC8ADEDB3, 0x28A87FC9, 0x3D959981, 0x5C1FF900, 0xFE38D399, + 0x0C4EFF0B, 0x062407EA, 0xAA2F4FB1, 0x4FB96976, 0x90C79505, 0xB0A8A774, + 0xEF55A1FF, 0xE59CA2C2, 0xA6B62D27, 0xE66A4263, 0xDF65001F, 0x0EC50966, + 0xDFDD55BC, 0x29DE0655, 0x911E739A, 0x17AF8975, 0x32C7911C, 0x89F89468, + 0x0D01E980, 0x524755F4, 0x03B63CC9, 0x0CC844B2, 0xBCF3F0AA, 0x87AC36E9, + 0xE53A7426, 0x01B3D82B, 0x1A9E7449, 0x64EE2D7E, 0xCDDBB1DA, 0x01C94910, + 0xB868BF80, 0x0D26F3FD, 0x9342EDE7, 0x04A5C284, 0x636737B6, 0x50F5B616, + 0xF24766E3, 0x8ECA36C1, 0x136E05DB, 0xFEF18391, 0xFB887A37, 0xD6E7F7D4, + 0xC7FB7DC9, 0x3063FCDF, 0xB6F589DE, 0xEC2941DA, 0x26E46695, 0xB7566419, + 0xF654EFC5, 0xD08D58B7, 0x48925401, 0xC1BACB7F, 0xE5FF550F, 0xB6083049, + 0x5BB5D0E8, 0x87D72E5A, 0xAB6A6EE1, 0x223A66CE, 0xC62BF3CD, 0x9E0885F9, + 0x68CB3E47, 0x086C010F, 0xA21DE820, 0xD18B69DE, 0xF3F65777, 0xFA02C3F6, + 0x407EDAC3, 0xCBB3D550, 0x1793084D, 0xB0D70EBA, 0x0AB378D5, 0xD951FB0C, + 0xDED7DA56, 0x4124BBE4, 0x94CA0B56, 0x0F5755D1, 0xE0E1E56E, 0x6184B5BE, + 0x580A249F, 0x94F74BC0, 0xE327888E, 0x9F7B5561, 0xC3DC0280, 0x05687715, + 0x646C6BD7, 0x44904DB3, 0x66B4F0A3, 0xC0F1648A, 0x697ED5AF, 0x49E92FF6, + 0x309E374F, 0x2CB6356A, 0x85808573, 0x4991F840, 0x76F0AE02, 0x083BE84D, + 0x28421C9A, 0x44489406, 0x736E4CB8, 0xC1092910, 0x8BC95FC6, 0x7D869CF4, + 0x134F616F, 0x2E77118D, 0xB31B2BE1, 0xAA90B472, 0x3CA5D717, 0x7D161BBA, + 0x9CAD9010, 0xAF462BA2, 0x9FE459D2, 0x45D34559, 0xD9F2DA13, 0xDBC65487, + 0xF3E4F94E, 0x176D486F, 0x097C13EA, 0x631DA5C7, 0x445F7382, 0x175683F4, + 0xCDC66A97, 0x70BE0288, 0xB3CDCF72, 0x6E5DD2F3, 0x20936079, 0x459B80A5, + 0xBE60E2DB, 0xA9C23101, 0xEBA5315C, 0x224E42F2, 0x1C5C1572, 0xF6721B2C, + 0x1AD2FFF3, 0x8C25404E, 0x324ED72F, 0x4067B7FD, 0x0523138E, 0x5CA3BC78, + 0xDC0FD66E, 0x75922283, 0x784D6B17, 0x58EBB16E, 0x44094F85, 0x3F481D87, + 0xFCFEAE7B, 0x77B5FF76, 0x8C2302BF, 0xAAF47556, 0x5F46B02A, 0x2B092801, + 0x3D38F5F7, 0x0CA81F36, 0x52AF4A8A, 0x66D5E7C0, 0xDF3B0874, 0x95055110, + 0x1B5AD7A8, 0xF61ED5AD, 0x6CF6E479, 0x20758184, 0xD0CEFA65, 0x88F7BE58, + 0x4A046826, 0x0FF6F8F3, 0xA09C7F70, 0x5346ABA0, 0x5CE96C28, 0xE176EDA3, + 0x6BAC307F, 0x376829D2, 0x85360FA9, 0x17E3FE2A, 0x24B79767, 0xF5A96B20, + 0xD6CD2595, 0x68FF1EBF, 0x7555442C, 0xF19F06BE, 0xF9E0659A, 0xEEB9491D, + 0x34010718, 0xBB30CAB8, 0xE822FE15, 0x88570983, 0x750E6249, 0xDA627E55, + 0x5E76FFA8, 0xB1534546, 0x6D47DE08, 0xEFE9E7D4 }; + + alignas(64) static const uint32_t S6[256] = { + 0xF6FA8F9D, 0x2CAC6CE1, 0x4CA34867, 0xE2337F7C, 0x95DB08E7, 0x016843B4, + 0xECED5CBC, 0x325553AC, 0xBF9F0960, 0xDFA1E2ED, 0x83F0579D, 0x63ED86B9, + 0x1AB6A6B8, 0xDE5EBE39, 0xF38FF732, 0x8989B138, 0x33F14961, 0xC01937BD, + 0xF506C6DA, 0xE4625E7E, 0xA308EA99, 0x4E23E33C, 0x79CBD7CC, 0x48A14367, + 0xA3149619, 0xFEC94BD5, 0xA114174A, 0xEAA01866, 0xA084DB2D, 0x09A8486F, + 0xA888614A, 0x2900AF98, 0x01665991, 0xE1992863, 0xC8F30C60, 0x2E78EF3C, + 0xD0D51932, 0xCF0FEC14, 0xF7CA07D2, 0xD0A82072, 0xFD41197E, 0x9305A6B0, + 0xE86BE3DA, 0x74BED3CD, 0x372DA53C, 0x4C7F4448, 0xDAB5D440, 0x6DBA0EC3, + 0x083919A7, 0x9FBAEED9, 0x49DBCFB0, 0x4E670C53, 0x5C3D9C01, 0x64BDB941, + 0x2C0E636A, 0xBA7DD9CD, 0xEA6F7388, 0xE70BC762, 0x35F29ADB, 0x5C4CDD8D, + 0xF0D48D8C, 0xB88153E2, 0x08A19866, 0x1AE2EAC8, 0x284CAF89, 0xAA928223, + 0x9334BE53, 0x3B3A21BF, 0x16434BE3, 0x9AEA3906, 0xEFE8C36E, 0xF890CDD9, + 0x80226DAE, 0xC340A4A3, 0xDF7E9C09, 0xA694A807, 0x5B7C5ECC, 0x221DB3A6, + 0x9A69A02F, 0x68818A54, 0xCEB2296F, 0x53C0843A, 0xFE893655, 0x25BFE68A, + 0xB4628ABC, 0xCF222EBF, 0x25AC6F48, 0xA9A99387, 0x53BDDB65, 0xE76FFBE7, + 0xE967FD78, 0x0BA93563, 0x8E342BC1, 0xE8A11BE9, 0x4980740D, 0xC8087DFC, + 0x8DE4BF99, 0xA11101A0, 0x7FD37975, 0xDA5A26C0, 0xE81F994F, 0x9528CD89, + 0xFD339FED, 0xB87834BF, 0x5F04456D, 0x22258698, 0xC9C4C83B, 0x2DC156BE, + 0x4F628DAA, 0x57F55EC5, 0xE2220ABE, 0xD2916EBF, 0x4EC75B95, 0x24F2C3C0, + 0x42D15D99, 0xCD0D7FA0, 0x7B6E27FF, 0xA8DC8AF0, 0x7345C106, 0xF41E232F, + 0x35162386, 0xE6EA8926, 0x3333B094, 0x157EC6F2, 0x372B74AF, 0x692573E4, + 0xE9A9D848, 0xF3160289, 0x3A62EF1D, 0xA787E238, 0xF3A5F676, 0x74364853, + 0x20951063, 0x4576698D, 0xB6FAD407, 0x592AF950, 0x36F73523, 0x4CFB6E87, + 0x7DA4CEC0, 0x6C152DAA, 0xCB0396A8, 0xC50DFE5D, 0xFCD707AB, 0x0921C42F, + 0x89DFF0BB, 0x5FE2BE78, 0x448F4F33, 0x754613C9, 0x2B05D08D, 0x48B9D585, + 0xDC049441, 0xC8098F9B, 0x7DEDE786, 0xC39A3373, 0x42410005, 0x6A091751, + 0x0EF3C8A6, 0x890072D6, 0x28207682, 0xA9A9F7BE, 0xBF32679D, 0xD45B5B75, + 0xB353FD00, 0xCBB0E358, 0x830F220A, 0x1F8FB214, 0xD372CF08, 0xCC3C4A13, + 0x8CF63166, 0x061C87BE, 0x88C98F88, 0x6062E397, 0x47CF8E7A, 0xB6C85283, + 0x3CC2ACFB, 0x3FC06976, 0x4E8F0252, 0x64D8314D, 0xDA3870E3, 0x1E665459, + 0xC10908F0, 0x513021A5, 0x6C5B68B7, 0x822F8AA0, 0x3007CD3E, 0x74719EEF, + 0xDC872681, 0x073340D4, 0x7E432FD9, 0x0C5EC241, 0x8809286C, 0xF592D891, + 0x08A930F6, 0x957EF305, 0xB7FBFFBD, 0xC266E96F, 0x6FE4AC98, 0xB173ECC0, + 0xBC60B42A, 0x953498DA, 0xFBA1AE12, 0x2D4BD736, 0x0F25FAAB, 0xA4F3FCEB, + 0xE2969123, 0x257F0C3D, 0x9348AF49, 0x361400BC, 0xE8816F4A, 0x3814F200, + 0xA3F94043, 0x9C7A54C2, 0xBC704F57, 0xDA41E7F9, 0xC25AD33A, 0x54F4A084, + 0xB17F5505, 0x59357CBE, 0xEDBD15C8, 0x7F97C5AB, 0xBA5AC7B5, 0xB6F6DEAF, + 0x3A479C3A, 0x5302DA25, 0x653D7E6A, 0x54268D49, 0x51A477EA, 0x5017D55B, + 0xD7D25D88, 0x44136C76, 0x0404A8C8, 0xB8E5A121, 0xB81A928A, 0x60ED5869, + 0x97C55B96, 0xEAEC991B, 0x29935913, 0x01FDB7F1, 0x088E8DFA, 0x9AB6F6F5, + 0x3B4CBF9F, 0x4A5DE3AB, 0xE6051D35, 0xA0E1D855, 0xD36B4CF1, 0xF544EDEB, + 0xB0E93524, 0xBEBB8FBD, 0xA2D762CF, 0x49C92F54, 0x38B5F331, 0x7128A454, + 0x48392905, 0xA65B1DB8, 0x851C97BD, 0xD675CF2F }; + + alignas(64) static const uint32_t S7[256] = { + 0x85E04019, 0x332BF567, 0x662DBFFF, 0xCFC65693, 0x2A8D7F6F, 0xAB9BC912, + 0xDE6008A1, 0x2028DA1F, 0x0227BCE7, 0x4D642916, 0x18FAC300, 0x50F18B82, + 0x2CB2CB11, 0xB232E75C, 0x4B3695F2, 0xB28707DE, 0xA05FBCF6, 0xCD4181E9, + 0xE150210C, 0xE24EF1BD, 0xB168C381, 0xFDE4E789, 0x5C79B0D8, 0x1E8BFD43, + 0x4D495001, 0x38BE4341, 0x913CEE1D, 0x92A79C3F, 0x089766BE, 0xBAEEADF4, + 0x1286BECF, 0xB6EACB19, 0x2660C200, 0x7565BDE4, 0x64241F7A, 0x8248DCA9, + 0xC3B3AD66, 0x28136086, 0x0BD8DFA8, 0x356D1CF2, 0x107789BE, 0xB3B2E9CE, + 0x0502AA8F, 0x0BC0351E, 0x166BF52A, 0xEB12FF82, 0xE3486911, 0xD34D7516, + 0x4E7B3AFF, 0x5F43671B, 0x9CF6E037, 0x4981AC83, 0x334266CE, 0x8C9341B7, + 0xD0D854C0, 0xCB3A6C88, 0x47BC2829, 0x4725BA37, 0xA66AD22B, 0x7AD61F1E, + 0x0C5CBAFA, 0x4437F107, 0xB6E79962, 0x42D2D816, 0x0A961288, 0xE1A5C06E, + 0x13749E67, 0x72FC081A, 0xB1D139F7, 0xF9583745, 0xCF19DF58, 0xBEC3F756, + 0xC06EBA30, 0x07211B24, 0x45C28829, 0xC95E317F, 0xBC8EC511, 0x38BC46E9, + 0xC6E6FA14, 0xBAE8584A, 0xAD4EBC46, 0x468F508B, 0x7829435F, 0xF124183B, + 0x821DBA9F, 0xAFF60FF4, 0xEA2C4E6D, 0x16E39264, 0x92544A8B, 0x009B4FC3, + 0xABA68CED, 0x9AC96F78, 0x06A5B79A, 0xB2856E6E, 0x1AEC3CA9, 0xBE838688, + 0x0E0804E9, 0x55F1BE56, 0xE7E5363B, 0xB3A1F25D, 0xF7DEBB85, 0x61FE033C, + 0x16746233, 0x3C034C28, 0xDA6D0C74, 0x79AAC56C, 0x3CE4E1AD, 0x51F0C802, + 0x98F8F35A, 0x1626A49F, 0xEED82B29, 0x1D382FE3, 0x0C4FB99A, 0xBB325778, + 0x3EC6D97B, 0x6E77A6A9, 0xCB658B5C, 0xD45230C7, 0x2BD1408B, 0x60C03EB7, + 0xB9068D78, 0xA33754F4, 0xF430C87D, 0xC8A71302, 0xB96D8C32, 0xEBD4E7BE, + 0xBE8B9D2D, 0x7979FB06, 0xE7225308, 0x8B75CF77, 0x11EF8DA4, 0xE083C858, + 0x8D6B786F, 0x5A6317A6, 0xFA5CF7A0, 0x5DDA0033, 0xF28EBFB0, 0xF5B9C310, + 0xA0EAC280, 0x08B9767A, 0xA3D9D2B0, 0x79D34217, 0x021A718D, 0x9AC6336A, + 0x2711FD60, 0x438050E3, 0x069908A8, 0x3D7FEDC4, 0x826D2BEF, 0x4EEB8476, + 0x488DCF25, 0x36C9D566, 0x28E74E41, 0xC2610ACA, 0x3D49A9CF, 0xBAE3B9DF, + 0xB65F8DE6, 0x92AEAF64, 0x3AC7D5E6, 0x9EA80509, 0xF22B017D, 0xA4173F70, + 0xDD1E16C3, 0x15E0D7F9, 0x50B1B887, 0x2B9F4FD5, 0x625ABA82, 0x6A017962, + 0x2EC01B9C, 0x15488AA9, 0xD716E740, 0x40055A2C, 0x93D29A22, 0xE32DBF9A, + 0x058745B9, 0x3453DC1E, 0xD699296E, 0x496CFF6F, 0x1C9F4986, 0xDFE2ED07, + 0xB87242D1, 0x19DE7EAE, 0x053E561A, 0x15AD6F8C, 0x66626C1C, 0x7154C24C, + 0xEA082B2A, 0x93EB2939, 0x17DCB0F0, 0x58D4F2AE, 0x9EA294FB, 0x52CF564C, + 0x9883FE66, 0x2EC40581, 0x763953C3, 0x01D6692E, 0xD3A0C108, 0xA1E7160E, + 0xE4F2DFA6, 0x693ED285, 0x74904698, 0x4C2B0EDD, 0x4F757656, 0x5D393378, + 0xA132234F, 0x3D321C5D, 0xC3F5E194, 0x4B269301, 0xC79F022F, 0x3C997E7E, + 0x5E4F9504, 0x3FFAFBBD, 0x76F7AD0E, 0x296693F4, 0x3D1FCE6F, 0xC61E45BE, + 0xD3B5AB34, 0xF72BF9B7, 0x1B0434C0, 0x4E72B567, 0x5592A33D, 0xB5229301, + 0xCFD2A87F, 0x60AEB767, 0x1814386B, 0x30BCC33D, 0x38A0C07D, 0xFD1606F2, + 0xC363519B, 0x589DD390, 0x5479F8E6, 0x1CB8D647, 0x97FD61A9, 0xEA7759F4, + 0x2D57539D, 0x569A58CF, 0xE84E63AD, 0x462E1B78, 0x6580F87E, 0xF3817914, + 0x91DA55F4, 0x40A230F3, 0xD1988F35, 0xB6E318D2, 0x3FFA50BC, 0x3D40F021, + 0xC3C0BDAE, 0x4958C24C, 0x518F36B2, 0x84B1D370, 0x0FEDCE83, 0x878DDADA, + 0xF2A279C7, 0x94E01BE8, 0x90716F4B, 0x954B8AA3 }; + + alignas(64) static const uint32_t S8[256] = { + 0xE216300D, 0xBBDDFFFC, 0xA7EBDABD, 0x35648095, 0x7789F8B7, 0xE6C1121B, + 0x0E241600, 0x052CE8B5, 0x11A9CFB0, 0xE5952F11, 0xECE7990A, 0x9386D174, + 0x2A42931C, 0x76E38111, 0xB12DEF3A, 0x37DDDDFC, 0xDE9ADEB1, 0x0A0CC32C, + 0xBE197029, 0x84A00940, 0xBB243A0F, 0xB4D137CF, 0xB44E79F0, 0x049EEDFD, + 0x0B15A15D, 0x480D3168, 0x8BBBDE5A, 0x669DED42, 0xC7ECE831, 0x3F8F95E7, + 0x72DF191B, 0x7580330D, 0x94074251, 0x5C7DCDFA, 0xABBE6D63, 0xAA402164, + 0xB301D40A, 0x02E7D1CA, 0x53571DAE, 0x7A3182A2, 0x12A8DDEC, 0xFDAA335D, + 0x176F43E8, 0x71FB46D4, 0x38129022, 0xCE949AD4, 0xB84769AD, 0x965BD862, + 0x82F3D055, 0x66FB9767, 0x15B80B4E, 0x1D5B47A0, 0x4CFDE06F, 0xC28EC4B8, + 0x57E8726E, 0x647A78FC, 0x99865D44, 0x608BD593, 0x6C200E03, 0x39DC5FF6, + 0x5D0B00A3, 0xAE63AFF2, 0x7E8BD632, 0x70108C0C, 0xBBD35049, 0x2998DF04, + 0x980CF42A, 0x9B6DF491, 0x9E7EDD53, 0x06918548, 0x58CB7E07, 0x3B74EF2E, + 0x522FFFB1, 0xD24708CC, 0x1C7E27CD, 0xA4EB215B, 0x3CF1D2E2, 0x19B47A38, + 0x424F7618, 0x35856039, 0x9D17DEE7, 0x27EB35E6, 0xC9AFF67B, 0x36BAF5B8, + 0x09C467CD, 0xC18910B1, 0xE11DBF7B, 0x06CD1AF8, 0x7170C608, 0x2D5E3354, + 0xD4DE495A, 0x64C6D006, 0xBCC0C62C, 0x3DD00DB3, 0x708F8F34, 0x77D51B42, + 0x264F620F, 0x24B8D2BF, 0x15C1B79E, 0x46A52564, 0xF8D7E54E, 0x3E378160, + 0x7895CDA5, 0x859C15A5, 0xE6459788, 0xC37BC75F, 0xDB07BA0C, 0x0676A3AB, + 0x7F229B1E, 0x31842E7B, 0x24259FD7, 0xF8BEF472, 0x835FFCB8, 0x6DF4C1F2, + 0x96F5B195, 0xFD0AF0FC, 0xB0FE134C, 0xE2506D3D, 0x4F9B12EA, 0xF215F225, + 0xA223736F, 0x9FB4C428, 0x25D04979, 0x34C713F8, 0xC4618187, 0xEA7A6E98, + 0x7CD16EFC, 0x1436876C, 0xF1544107, 0xBEDEEE14, 0x56E9AF27, 0xA04AA441, + 0x3CF7C899, 0x92ECBAE6, 0xDD67016D, 0x151682EB, 0xA842EEDF, 0xFDBA60B4, + 0xF1907B75, 0x20E3030F, 0x24D8C29E, 0xE139673B, 0xEFA63FB8, 0x71873054, + 0xB6F2CF3B, 0x9F326442, 0xCB15A4CC, 0xB01A4504, 0xF1E47D8D, 0x844A1BE5, + 0xBAE7DFDC, 0x42CBDA70, 0xCD7DAE0A, 0x57E85B7A, 0xD53F5AF6, 0x20CF4D8C, + 0xCEA4D428, 0x79D130A4, 0x3486EBFB, 0x33D3CDDC, 0x77853B53, 0x37EFFCB5, + 0xC5068778, 0xE580B3E6, 0x4E68B8F4, 0xC5C8B37E, 0x0D809EA2, 0x398FEB7C, + 0x132A4F94, 0x43B7950E, 0x2FEE7D1C, 0x223613BD, 0xDD06CAA2, 0x37DF932B, + 0xC4248289, 0xACF3EBC3, 0x5715F6B7, 0xEF3478DD, 0xF267616F, 0xC148CBE4, + 0x9052815E, 0x5E410FAB, 0xB48A2465, 0x2EDA7FA4, 0xE87B40E4, 0xE98EA084, + 0x5889E9E1, 0xEFD390FC, 0xDD07D35B, 0xDB485694, 0x38D7E5B2, 0x57720101, + 0x730EDEBC, 0x5B643113, 0x94917E4F, 0x503C2FBA, 0x646F1282, 0x7523D24A, + 0xE0779695, 0xF9C17A8F, 0x7A5B2121, 0xD187B896, 0x29263A4D, 0xBA510CDF, + 0x81F47C9F, 0xAD1163ED, 0xEA7B5965, 0x1A00726E, 0x11403092, 0x00DA6D77, + 0x4A0CDD61, 0xAD1F4603, 0x605BDFB0, 0x9EEDC364, 0x22EBE6A8, 0xCEE7D28A, + 0xA0E736A0, 0x5564A6B9, 0x10853209, 0xC7EB8F37, 0x2DE705CA, 0x8951570F, + 0xDF09822B, 0xBD691A6C, 0xAA12E4F2, 0x87451C0F, 0xE0F6A27A, 0x3ADA4819, + 0x4CF1764F, 0x0D771C2B, 0x67CDB156, 0x350D8384, 0x5938FA0F, 0x42399EF3, + 0x36997B07, 0x0E84093D, 0x4AA93E61, 0x8360D87B, 0x1FA98B0C, 0x1149382C, + 0xE97625A5, 0x0614D1B7, 0x0E25244B, 0x0C768347, 0x589E8D82, 0x0D2059D1, + 0xA466BB1E, 0xF8DA0A82, 0x04F19130, 0xBA6E4EC0, 0x99265164, 0x1EE7230D, + 0x50B2AD80, 0xEAEE6801, 0x8DB2A283, 0xEA8BF59E }; + + class ByteReader final + { + public: + uint8_t operator()(size_t i) const + { + return static_cast<uint8_t>(m_X[i/4] >> (8*(3 - (i%4)))); + } + + explicit ByteReader(const uint32_t* x) : m_X(x) {} + private: + const uint32_t* m_X; + }; + + secure_vector<uint32_t> Z(4); + ByteReader x(X.data()), z(Z.data()); + + Z[0] = X[0] ^ S5[x(13)] ^ S6[x(15)] ^ S7[x(12)] ^ S8[x(14)] ^ S7[x( 8)]; + Z[1] = X[2] ^ S5[z( 0)] ^ S6[z( 2)] ^ S7[z( 1)] ^ S8[z( 3)] ^ S8[x(10)]; + Z[2] = X[3] ^ S5[z( 7)] ^ S6[z( 6)] ^ S7[z( 5)] ^ S8[z( 4)] ^ S5[x( 9)]; + Z[3] = X[1] ^ S5[z(10)] ^ S6[z( 9)] ^ S7[z(11)] ^ S8[z( 8)] ^ S6[x(11)]; + K[ 0] = S5[z( 8)] ^ S6[z( 9)] ^ S7[z( 7)] ^ S8[z( 6)] ^ S5[z( 2)]; + K[ 1] = S5[z(10)] ^ S6[z(11)] ^ S7[z( 5)] ^ S8[z( 4)] ^ S6[z( 6)]; + K[ 2] = S5[z(12)] ^ S6[z(13)] ^ S7[z( 3)] ^ S8[z( 2)] ^ S7[z( 9)]; + K[ 3] = S5[z(14)] ^ S6[z(15)] ^ S7[z( 1)] ^ S8[z( 0)] ^ S8[z(12)]; + X[0] = Z[2] ^ S5[z( 5)] ^ S6[z( 7)] ^ S7[z( 4)] ^ S8[z( 6)] ^ S7[z( 0)]; + X[1] = Z[0] ^ S5[x( 0)] ^ S6[x( 2)] ^ S7[x( 1)] ^ S8[x( 3)] ^ S8[z( 2)]; + X[2] = Z[1] ^ S5[x( 7)] ^ S6[x( 6)] ^ S7[x( 5)] ^ S8[x( 4)] ^ S5[z( 1)]; + X[3] = Z[3] ^ S5[x(10)] ^ S6[x( 9)] ^ S7[x(11)] ^ S8[x( 8)] ^ S6[z( 3)]; + K[ 4] = S5[x( 3)] ^ S6[x( 2)] ^ S7[x(12)] ^ S8[x(13)] ^ S5[x( 8)]; + K[ 5] = S5[x( 1)] ^ S6[x( 0)] ^ S7[x(14)] ^ S8[x(15)] ^ S6[x(13)]; + K[ 6] = S5[x( 7)] ^ S6[x( 6)] ^ S7[x( 8)] ^ S8[x( 9)] ^ S7[x( 3)]; + K[ 7] = S5[x( 5)] ^ S6[x( 4)] ^ S7[x(10)] ^ S8[x(11)] ^ S8[x( 7)]; + Z[0] = X[0] ^ S5[x(13)] ^ S6[x(15)] ^ S7[x(12)] ^ S8[x(14)] ^ S7[x( 8)]; + Z[1] = X[2] ^ S5[z( 0)] ^ S6[z( 2)] ^ S7[z( 1)] ^ S8[z( 3)] ^ S8[x(10)]; + Z[2] = X[3] ^ S5[z( 7)] ^ S6[z( 6)] ^ S7[z( 5)] ^ S8[z( 4)] ^ S5[x( 9)]; + Z[3] = X[1] ^ S5[z(10)] ^ S6[z( 9)] ^ S7[z(11)] ^ S8[z( 8)] ^ S6[x(11)]; + K[ 8] = S5[z( 3)] ^ S6[z( 2)] ^ S7[z(12)] ^ S8[z(13)] ^ S5[z( 9)]; + K[ 9] = S5[z( 1)] ^ S6[z( 0)] ^ S7[z(14)] ^ S8[z(15)] ^ S6[z(12)]; + K[10] = S5[z( 7)] ^ S6[z( 6)] ^ S7[z( 8)] ^ S8[z( 9)] ^ S7[z( 2)]; + K[11] = S5[z( 5)] ^ S6[z( 4)] ^ S7[z(10)] ^ S8[z(11)] ^ S8[z( 6)]; + X[0] = Z[2] ^ S5[z( 5)] ^ S6[z( 7)] ^ S7[z( 4)] ^ S8[z( 6)] ^ S7[z( 0)]; + X[1] = Z[0] ^ S5[x( 0)] ^ S6[x( 2)] ^ S7[x( 1)] ^ S8[x( 3)] ^ S8[z( 2)]; + X[2] = Z[1] ^ S5[x( 7)] ^ S6[x( 6)] ^ S7[x( 5)] ^ S8[x( 4)] ^ S5[z( 1)]; + X[3] = Z[3] ^ S5[x(10)] ^ S6[x( 9)] ^ S7[x(11)] ^ S8[x( 8)] ^ S6[z( 3)]; + K[12] = S5[x( 8)] ^ S6[x( 9)] ^ S7[x( 7)] ^ S8[x( 6)] ^ S5[x( 3)]; + K[13] = S5[x(10)] ^ S6[x(11)] ^ S7[x( 5)] ^ S8[x( 4)] ^ S6[x( 7)]; + K[14] = S5[x(12)] ^ S6[x(13)] ^ S7[x( 3)] ^ S8[x( 2)] ^ S7[x( 8)]; + K[15] = S5[x(14)] ^ S6[x(15)] ^ S7[x( 1)] ^ S8[x( 0)] ^ S8[x(13)]; + } + +} diff --git a/comm/third_party/botan/src/lib/block/cast128/cast128.h b/comm/third_party/botan/src/lib/block/cast128/cast128.h new file mode 100644 index 0000000000..a5f2a64019 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/cast128/cast128.h @@ -0,0 +1,42 @@ +/* +* CAST-128 +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_CAST128_H_ +#define BOTAN_CAST128_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(cast128.h) + +namespace Botan { + +/** +* CAST-128 +*/ +class BOTAN_PUBLIC_API(2,0) CAST_128 final : public Block_Cipher_Fixed_Params<8, 11, 16> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string name() const override { return "CAST-128"; } + BlockCipher* clone() const override { return new CAST_128; } + + private: + void key_schedule(const uint8_t[], size_t) override; + + static void cast_ks(secure_vector<uint32_t>& ks, + secure_vector<uint32_t>& user_key); + + secure_vector<uint32_t> m_MK; + secure_vector<uint8_t> m_RK; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/cast128/cast_sboxes.h b/comm/third_party/botan/src/lib/block/cast128/cast_sboxes.h new file mode 100644 index 0000000000..f4d005cc94 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/cast128/cast_sboxes.h @@ -0,0 +1,197 @@ +/* +* S-Box Tables for CAST-128 and CAST-256 +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_CAST_SBOX_TABLES_H_ +#define BOTAN_CAST_SBOX_TABLES_H_ + +#include <botan/types.h> + +namespace Botan { + +alignas(64) const uint32_t CAST_SBOX1[256] = { + 0x30FB40D4, 0x9FA0FF0B, 0x6BECCD2F, 0x3F258C7A, 0x1E213F2F, 0x9C004DD3, + 0x6003E540, 0xCF9FC949, 0xBFD4AF27, 0x88BBBDB5, 0xE2034090, 0x98D09675, + 0x6E63A0E0, 0x15C361D2, 0xC2E7661D, 0x22D4FF8E, 0x28683B6F, 0xC07FD059, + 0xFF2379C8, 0x775F50E2, 0x43C340D3, 0xDF2F8656, 0x887CA41A, 0xA2D2BD2D, + 0xA1C9E0D6, 0x346C4819, 0x61B76D87, 0x22540F2F, 0x2ABE32E1, 0xAA54166B, + 0x22568E3A, 0xA2D341D0, 0x66DB40C8, 0xA784392F, 0x004DFF2F, 0x2DB9D2DE, + 0x97943FAC, 0x4A97C1D8, 0x527644B7, 0xB5F437A7, 0xB82CBAEF, 0xD751D159, + 0x6FF7F0ED, 0x5A097A1F, 0x827B68D0, 0x90ECF52E, 0x22B0C054, 0xBC8E5935, + 0x4B6D2F7F, 0x50BB64A2, 0xD2664910, 0xBEE5812D, 0xB7332290, 0xE93B159F, + 0xB48EE411, 0x4BFF345D, 0xFD45C240, 0xAD31973F, 0xC4F6D02E, 0x55FC8165, + 0xD5B1CAAD, 0xA1AC2DAE, 0xA2D4B76D, 0xC19B0C50, 0x882240F2, 0x0C6E4F38, + 0xA4E4BFD7, 0x4F5BA272, 0x564C1D2F, 0xC59C5319, 0xB949E354, 0xB04669FE, + 0xB1B6AB8A, 0xC71358DD, 0x6385C545, 0x110F935D, 0x57538AD5, 0x6A390493, + 0xE63D37E0, 0x2A54F6B3, 0x3A787D5F, 0x6276A0B5, 0x19A6FCDF, 0x7A42206A, + 0x29F9D4D5, 0xF61B1891, 0xBB72275E, 0xAA508167, 0x38901091, 0xC6B505EB, + 0x84C7CB8C, 0x2AD75A0F, 0x874A1427, 0xA2D1936B, 0x2AD286AF, 0xAA56D291, + 0xD7894360, 0x425C750D, 0x93B39E26, 0x187184C9, 0x6C00B32D, 0x73E2BB14, + 0xA0BEBC3C, 0x54623779, 0x64459EAB, 0x3F328B82, 0x7718CF82, 0x59A2CEA6, + 0x04EE002E, 0x89FE78E6, 0x3FAB0950, 0x325FF6C2, 0x81383F05, 0x6963C5C8, + 0x76CB5AD6, 0xD49974C9, 0xCA180DCF, 0x380782D5, 0xC7FA5CF6, 0x8AC31511, + 0x35E79E13, 0x47DA91D0, 0xF40F9086, 0xA7E2419E, 0x31366241, 0x051EF495, + 0xAA573B04, 0x4A805D8D, 0x548300D0, 0x00322A3C, 0xBF64CDDF, 0xBA57A68E, + 0x75C6372B, 0x50AFD341, 0xA7C13275, 0x915A0BF5, 0x6B54BFAB, 0x2B0B1426, + 0xAB4CC9D7, 0x449CCD82, 0xF7FBF265, 0xAB85C5F3, 0x1B55DB94, 0xAAD4E324, + 0xCFA4BD3F, 0x2DEAA3E2, 0x9E204D02, 0xC8BD25AC, 0xEADF55B3, 0xD5BD9E98, + 0xE31231B2, 0x2AD5AD6C, 0x954329DE, 0xADBE4528, 0xD8710F69, 0xAA51C90F, + 0xAA786BF6, 0x22513F1E, 0xAA51A79B, 0x2AD344CC, 0x7B5A41F0, 0xD37CFBAD, + 0x1B069505, 0x41ECE491, 0xB4C332E6, 0x032268D4, 0xC9600ACC, 0xCE387E6D, + 0xBF6BB16C, 0x6A70FB78, 0x0D03D9C9, 0xD4DF39DE, 0xE01063DA, 0x4736F464, + 0x5AD328D8, 0xB347CC96, 0x75BB0FC3, 0x98511BFB, 0x4FFBCC35, 0xB58BCF6A, + 0xE11F0ABC, 0xBFC5FE4A, 0xA70AEC10, 0xAC39570A, 0x3F04442F, 0x6188B153, + 0xE0397A2E, 0x5727CB79, 0x9CEB418F, 0x1CACD68D, 0x2AD37C96, 0x0175CB9D, + 0xC69DFF09, 0xC75B65F0, 0xD9DB40D8, 0xEC0E7779, 0x4744EAD4, 0xB11C3274, + 0xDD24CB9E, 0x7E1C54BD, 0xF01144F9, 0xD2240EB1, 0x9675B3FD, 0xA3AC3755, + 0xD47C27AF, 0x51C85F4D, 0x56907596, 0xA5BB15E6, 0x580304F0, 0xCA042CF1, + 0x011A37EA, 0x8DBFAADB, 0x35BA3E4A, 0x3526FFA0, 0xC37B4D09, 0xBC306ED9, + 0x98A52666, 0x5648F725, 0xFF5E569D, 0x0CED63D0, 0x7C63B2CF, 0x700B45E1, + 0xD5EA50F1, 0x85A92872, 0xAF1FBDA7, 0xD4234870, 0xA7870BF3, 0x2D3B4D79, + 0x42E04198, 0x0CD0EDE7, 0x26470DB8, 0xF881814C, 0x474D6AD7, 0x7C0C5E5C, + 0xD1231959, 0x381B7298, 0xF5D2F4DB, 0xAB838653, 0x6E2F1E23, 0x83719C9E, + 0xBD91E046, 0x9A56456E, 0xDC39200C, 0x20C8C571, 0x962BDA1C, 0xE1E696FF, + 0xB141AB08, 0x7CCA89B9, 0x1A69E783, 0x02CC4843, 0xA2F7C579, 0x429EF47D, + 0x427B169C, 0x5AC9F049, 0xDD8F0F00, 0x5C8165BF }; + +alignas(64) const uint32_t CAST_SBOX2[256] = { + 0x1F201094, 0xEF0BA75B, 0x69E3CF7E, 0x393F4380, 0xFE61CF7A, 0xEEC5207A, + 0x55889C94, 0x72FC0651, 0xADA7EF79, 0x4E1D7235, 0xD55A63CE, 0xDE0436BA, + 0x99C430EF, 0x5F0C0794, 0x18DCDB7D, 0xA1D6EFF3, 0xA0B52F7B, 0x59E83605, + 0xEE15B094, 0xE9FFD909, 0xDC440086, 0xEF944459, 0xBA83CCB3, 0xE0C3CDFB, + 0xD1DA4181, 0x3B092AB1, 0xF997F1C1, 0xA5E6CF7B, 0x01420DDB, 0xE4E7EF5B, + 0x25A1FF41, 0xE180F806, 0x1FC41080, 0x179BEE7A, 0xD37AC6A9, 0xFE5830A4, + 0x98DE8B7F, 0x77E83F4E, 0x79929269, 0x24FA9F7B, 0xE113C85B, 0xACC40083, + 0xD7503525, 0xF7EA615F, 0x62143154, 0x0D554B63, 0x5D681121, 0xC866C359, + 0x3D63CF73, 0xCEE234C0, 0xD4D87E87, 0x5C672B21, 0x071F6181, 0x39F7627F, + 0x361E3084, 0xE4EB573B, 0x602F64A4, 0xD63ACD9C, 0x1BBC4635, 0x9E81032D, + 0x2701F50C, 0x99847AB4, 0xA0E3DF79, 0xBA6CF38C, 0x10843094, 0x2537A95E, + 0xF46F6FFE, 0xA1FF3B1F, 0x208CFB6A, 0x8F458C74, 0xD9E0A227, 0x4EC73A34, + 0xFC884F69, 0x3E4DE8DF, 0xEF0E0088, 0x3559648D, 0x8A45388C, 0x1D804366, + 0x721D9BFD, 0xA58684BB, 0xE8256333, 0x844E8212, 0x128D8098, 0xFED33FB4, + 0xCE280AE1, 0x27E19BA5, 0xD5A6C252, 0xE49754BD, 0xC5D655DD, 0xEB667064, + 0x77840B4D, 0xA1B6A801, 0x84DB26A9, 0xE0B56714, 0x21F043B7, 0xE5D05860, + 0x54F03084, 0x066FF472, 0xA31AA153, 0xDADC4755, 0xB5625DBF, 0x68561BE6, + 0x83CA6B94, 0x2D6ED23B, 0xECCF01DB, 0xA6D3D0BA, 0xB6803D5C, 0xAF77A709, + 0x33B4A34C, 0x397BC8D6, 0x5EE22B95, 0x5F0E5304, 0x81ED6F61, 0x20E74364, + 0xB45E1378, 0xDE18639B, 0x881CA122, 0xB96726D1, 0x8049A7E8, 0x22B7DA7B, + 0x5E552D25, 0x5272D237, 0x79D2951C, 0xC60D894C, 0x488CB402, 0x1BA4FE5B, + 0xA4B09F6B, 0x1CA815CF, 0xA20C3005, 0x8871DF63, 0xB9DE2FCB, 0x0CC6C9E9, + 0x0BEEFF53, 0xE3214517, 0xB4542835, 0x9F63293C, 0xEE41E729, 0x6E1D2D7C, + 0x50045286, 0x1E6685F3, 0xF33401C6, 0x30A22C95, 0x31A70850, 0x60930F13, + 0x73F98417, 0xA1269859, 0xEC645C44, 0x52C877A9, 0xCDFF33A6, 0xA02B1741, + 0x7CBAD9A2, 0x2180036F, 0x50D99C08, 0xCB3F4861, 0xC26BD765, 0x64A3F6AB, + 0x80342676, 0x25A75E7B, 0xE4E6D1FC, 0x20C710E6, 0xCDF0B680, 0x17844D3B, + 0x31EEF84D, 0x7E0824E4, 0x2CCB49EB, 0x846A3BAE, 0x8FF77888, 0xEE5D60F6, + 0x7AF75673, 0x2FDD5CDB, 0xA11631C1, 0x30F66F43, 0xB3FAEC54, 0x157FD7FA, + 0xEF8579CC, 0xD152DE58, 0xDB2FFD5E, 0x8F32CE19, 0x306AF97A, 0x02F03EF8, + 0x99319AD5, 0xC242FA0F, 0xA7E3EBB0, 0xC68E4906, 0xB8DA230C, 0x80823028, + 0xDCDEF3C8, 0xD35FB171, 0x088A1BC8, 0xBEC0C560, 0x61A3C9E8, 0xBCA8F54D, + 0xC72FEFFA, 0x22822E99, 0x82C570B4, 0xD8D94E89, 0x8B1C34BC, 0x301E16E6, + 0x273BE979, 0xB0FFEAA6, 0x61D9B8C6, 0x00B24869, 0xB7FFCE3F, 0x08DC283B, + 0x43DAF65A, 0xF7E19798, 0x7619B72F, 0x8F1C9BA4, 0xDC8637A0, 0x16A7D3B1, + 0x9FC393B7, 0xA7136EEB, 0xC6BCC63E, 0x1A513742, 0xEF6828BC, 0x520365D6, + 0x2D6A77AB, 0x3527ED4B, 0x821FD216, 0x095C6E2E, 0xDB92F2FB, 0x5EEA29CB, + 0x145892F5, 0x91584F7F, 0x5483697B, 0x2667A8CC, 0x85196048, 0x8C4BACEA, + 0x833860D4, 0x0D23E0F9, 0x6C387E8A, 0x0AE6D249, 0xB284600C, 0xD835731D, + 0xDCB1C647, 0xAC4C56EA, 0x3EBD81B3, 0x230EABB0, 0x6438BC87, 0xF0B5B1FA, + 0x8F5EA2B3, 0xFC184642, 0x0A036B7A, 0x4FB089BD, 0x649DA589, 0xA345415E, + 0x5C038323, 0x3E5D3BB9, 0x43D79572, 0x7E6DD07C, 0x06DFDF1E, 0x6C6CC4EF, + 0x7160A539, 0x73BFBE70, 0x83877605, 0x4523ECF1 }; + +alignas(64) const uint32_t CAST_SBOX3[256] = { + 0x8DEFC240, 0x25FA5D9F, 0xEB903DBF, 0xE810C907, 0x47607FFF, 0x369FE44B, + 0x8C1FC644, 0xAECECA90, 0xBEB1F9BF, 0xEEFBCAEA, 0xE8CF1950, 0x51DF07AE, + 0x920E8806, 0xF0AD0548, 0xE13C8D83, 0x927010D5, 0x11107D9F, 0x07647DB9, + 0xB2E3E4D4, 0x3D4F285E, 0xB9AFA820, 0xFADE82E0, 0xA067268B, 0x8272792E, + 0x553FB2C0, 0x489AE22B, 0xD4EF9794, 0x125E3FBC, 0x21FFFCEE, 0x825B1BFD, + 0x9255C5ED, 0x1257A240, 0x4E1A8302, 0xBAE07FFF, 0x528246E7, 0x8E57140E, + 0x3373F7BF, 0x8C9F8188, 0xA6FC4EE8, 0xC982B5A5, 0xA8C01DB7, 0x579FC264, + 0x67094F31, 0xF2BD3F5F, 0x40FFF7C1, 0x1FB78DFC, 0x8E6BD2C1, 0x437BE59B, + 0x99B03DBF, 0xB5DBC64B, 0x638DC0E6, 0x55819D99, 0xA197C81C, 0x4A012D6E, + 0xC5884A28, 0xCCC36F71, 0xB843C213, 0x6C0743F1, 0x8309893C, 0x0FEDDD5F, + 0x2F7FE850, 0xD7C07F7E, 0x02507FBF, 0x5AFB9A04, 0xA747D2D0, 0x1651192E, + 0xAF70BF3E, 0x58C31380, 0x5F98302E, 0x727CC3C4, 0x0A0FB402, 0x0F7FEF82, + 0x8C96FDAD, 0x5D2C2AAE, 0x8EE99A49, 0x50DA88B8, 0x8427F4A0, 0x1EAC5790, + 0x796FB449, 0x8252DC15, 0xEFBD7D9B, 0xA672597D, 0xADA840D8, 0x45F54504, + 0xFA5D7403, 0xE83EC305, 0x4F91751A, 0x925669C2, 0x23EFE941, 0xA903F12E, + 0x60270DF2, 0x0276E4B6, 0x94FD6574, 0x927985B2, 0x8276DBCB, 0x02778176, + 0xF8AF918D, 0x4E48F79E, 0x8F616DDF, 0xE29D840E, 0x842F7D83, 0x340CE5C8, + 0x96BBB682, 0x93B4B148, 0xEF303CAB, 0x984FAF28, 0x779FAF9B, 0x92DC560D, + 0x224D1E20, 0x8437AA88, 0x7D29DC96, 0x2756D3DC, 0x8B907CEE, 0xB51FD240, + 0xE7C07CE3, 0xE566B4A1, 0xC3E9615E, 0x3CF8209D, 0x6094D1E3, 0xCD9CA341, + 0x5C76460E, 0x00EA983B, 0xD4D67881, 0xFD47572C, 0xF76CEDD9, 0xBDA8229C, + 0x127DADAA, 0x438A074E, 0x1F97C090, 0x081BDB8A, 0x93A07EBE, 0xB938CA15, + 0x97B03CFF, 0x3DC2C0F8, 0x8D1AB2EC, 0x64380E51, 0x68CC7BFB, 0xD90F2788, + 0x12490181, 0x5DE5FFD4, 0xDD7EF86A, 0x76A2E214, 0xB9A40368, 0x925D958F, + 0x4B39FFFA, 0xBA39AEE9, 0xA4FFD30B, 0xFAF7933B, 0x6D498623, 0x193CBCFA, + 0x27627545, 0x825CF47A, 0x61BD8BA0, 0xD11E42D1, 0xCEAD04F4, 0x127EA392, + 0x10428DB7, 0x8272A972, 0x9270C4A8, 0x127DE50B, 0x285BA1C8, 0x3C62F44F, + 0x35C0EAA5, 0xE805D231, 0x428929FB, 0xB4FCDF82, 0x4FB66A53, 0x0E7DC15B, + 0x1F081FAB, 0x108618AE, 0xFCFD086D, 0xF9FF2889, 0x694BCC11, 0x236A5CAE, + 0x12DECA4D, 0x2C3F8CC5, 0xD2D02DFE, 0xF8EF5896, 0xE4CF52DA, 0x95155B67, + 0x494A488C, 0xB9B6A80C, 0x5C8F82BC, 0x89D36B45, 0x3A609437, 0xEC00C9A9, + 0x44715253, 0x0A874B49, 0xD773BC40, 0x7C34671C, 0x02717EF6, 0x4FEB5536, + 0xA2D02FFF, 0xD2BF60C4, 0xD43F03C0, 0x50B4EF6D, 0x07478CD1, 0x006E1888, + 0xA2E53F55, 0xB9E6D4BC, 0xA2048016, 0x97573833, 0xD7207D67, 0xDE0F8F3D, + 0x72F87B33, 0xABCC4F33, 0x7688C55D, 0x7B00A6B0, 0x947B0001, 0x570075D2, + 0xF9BB88F8, 0x8942019E, 0x4264A5FF, 0x856302E0, 0x72DBD92B, 0xEE971B69, + 0x6EA22FDE, 0x5F08AE2B, 0xAF7A616D, 0xE5C98767, 0xCF1FEBD2, 0x61EFC8C2, + 0xF1AC2571, 0xCC8239C2, 0x67214CB8, 0xB1E583D1, 0xB7DC3E62, 0x7F10BDCE, + 0xF90A5C38, 0x0FF0443D, 0x606E6DC6, 0x60543A49, 0x5727C148, 0x2BE98A1D, + 0x8AB41738, 0x20E1BE24, 0xAF96DA0F, 0x68458425, 0x99833BE5, 0x600D457D, + 0x282F9350, 0x8334B362, 0xD91D1120, 0x2B6D8DA0, 0x642B1E31, 0x9C305A00, + 0x52BCE688, 0x1B03588A, 0xF7BAEFD5, 0x4142ED9C, 0xA4315C11, 0x83323EC5, + 0xDFEF4636, 0xA133C501, 0xE9D3531C, 0xEE353783 }; + +alignas(64) const uint32_t CAST_SBOX4[256] = { + 0x9DB30420, 0x1FB6E9DE, 0xA7BE7BEF, 0xD273A298, 0x4A4F7BDB, 0x64AD8C57, + 0x85510443, 0xFA020ED1, 0x7E287AFF, 0xE60FB663, 0x095F35A1, 0x79EBF120, + 0xFD059D43, 0x6497B7B1, 0xF3641F63, 0x241E4ADF, 0x28147F5F, 0x4FA2B8CD, + 0xC9430040, 0x0CC32220, 0xFDD30B30, 0xC0A5374F, 0x1D2D00D9, 0x24147B15, + 0xEE4D111A, 0x0FCA5167, 0x71FF904C, 0x2D195FFE, 0x1A05645F, 0x0C13FEFE, + 0x081B08CA, 0x05170121, 0x80530100, 0xE83E5EFE, 0xAC9AF4F8, 0x7FE72701, + 0xD2B8EE5F, 0x06DF4261, 0xBB9E9B8A, 0x7293EA25, 0xCE84FFDF, 0xF5718801, + 0x3DD64B04, 0xA26F263B, 0x7ED48400, 0x547EEBE6, 0x446D4CA0, 0x6CF3D6F5, + 0x2649ABDF, 0xAEA0C7F5, 0x36338CC1, 0x503F7E93, 0xD3772061, 0x11B638E1, + 0x72500E03, 0xF80EB2BB, 0xABE0502E, 0xEC8D77DE, 0x57971E81, 0xE14F6746, + 0xC9335400, 0x6920318F, 0x081DBB99, 0xFFC304A5, 0x4D351805, 0x7F3D5CE3, + 0xA6C866C6, 0x5D5BCCA9, 0xDAEC6FEA, 0x9F926F91, 0x9F46222F, 0x3991467D, + 0xA5BF6D8E, 0x1143C44F, 0x43958302, 0xD0214EEB, 0x022083B8, 0x3FB6180C, + 0x18F8931E, 0x281658E6, 0x26486E3E, 0x8BD78A70, 0x7477E4C1, 0xB506E07C, + 0xF32D0A25, 0x79098B02, 0xE4EABB81, 0x28123B23, 0x69DEAD38, 0x1574CA16, + 0xDF871B62, 0x211C40B7, 0xA51A9EF9, 0x0014377B, 0x041E8AC8, 0x09114003, + 0xBD59E4D2, 0xE3D156D5, 0x4FE876D5, 0x2F91A340, 0x557BE8DE, 0x00EAE4A7, + 0x0CE5C2EC, 0x4DB4BBA6, 0xE756BDFF, 0xDD3369AC, 0xEC17B035, 0x06572327, + 0x99AFC8B0, 0x56C8C391, 0x6B65811C, 0x5E146119, 0x6E85CB75, 0xBE07C002, + 0xC2325577, 0x893FF4EC, 0x5BBFC92D, 0xD0EC3B25, 0xB7801AB7, 0x8D6D3B24, + 0x20C763EF, 0xC366A5FC, 0x9C382880, 0x0ACE3205, 0xAAC9548A, 0xECA1D7C7, + 0x041AFA32, 0x1D16625A, 0x6701902C, 0x9B757A54, 0x31D477F7, 0x9126B031, + 0x36CC6FDB, 0xC70B8B46, 0xD9E66A48, 0x56E55A79, 0x026A4CEB, 0x52437EFF, + 0x2F8F76B4, 0x0DF980A5, 0x8674CDE3, 0xEDDA04EB, 0x17A9BE04, 0x2C18F4DF, + 0xB7747F9D, 0xAB2AF7B4, 0xEFC34D20, 0x2E096B7C, 0x1741A254, 0xE5B6A035, + 0x213D42F6, 0x2C1C7C26, 0x61C2F50F, 0x6552DAF9, 0xD2C231F8, 0x25130F69, + 0xD8167FA2, 0x0418F2C8, 0x001A96A6, 0x0D1526AB, 0x63315C21, 0x5E0A72EC, + 0x49BAFEFD, 0x187908D9, 0x8D0DBD86, 0x311170A7, 0x3E9B640C, 0xCC3E10D7, + 0xD5CAD3B6, 0x0CAEC388, 0xF73001E1, 0x6C728AFF, 0x71EAE2A1, 0x1F9AF36E, + 0xCFCBD12F, 0xC1DE8417, 0xAC07BE6B, 0xCB44A1D8, 0x8B9B0F56, 0x013988C3, + 0xB1C52FCA, 0xB4BE31CD, 0xD8782806, 0x12A3A4E2, 0x6F7DE532, 0x58FD7EB6, + 0xD01EE900, 0x24ADFFC2, 0xF4990FC5, 0x9711AAC5, 0x001D7B95, 0x82E5E7D2, + 0x109873F6, 0x00613096, 0xC32D9521, 0xADA121FF, 0x29908415, 0x7FBB977F, + 0xAF9EB3DB, 0x29C9ED2A, 0x5CE2A465, 0xA730F32C, 0xD0AA3FE8, 0x8A5CC091, + 0xD49E2CE7, 0x0CE454A9, 0xD60ACD86, 0x015F1919, 0x77079103, 0xDEA03AF6, + 0x78A8565E, 0xDEE356DF, 0x21F05CBE, 0x8B75E387, 0xB3C50651, 0xB8A5C3EF, + 0xD8EEB6D2, 0xE523BE77, 0xC2154529, 0x2F69EFDF, 0xAFE67AFB, 0xF470C4B2, + 0xF3E0EB5B, 0xD6CC9876, 0x39E4460C, 0x1FDA8538, 0x1987832F, 0xCA007367, + 0xA99144F8, 0x296B299E, 0x492FC295, 0x9266BEAB, 0xB5676E69, 0x9BD3DDDA, + 0xDF7E052F, 0xDB25701C, 0x1B5E51EE, 0xF65324E6, 0x6AFCE36C, 0x0316CC04, + 0x8644213E, 0xB7DC59D0, 0x7965291F, 0xCCD6FD43, 0x41823979, 0x932BCDF6, + 0xB657C34D, 0x4EDFD282, 0x7AE5290C, 0x3CB9536B, 0x851E20FE, 0x9833557E, + 0x13ECF0B0, 0xD3FFB372, 0x3F85C5C1, 0x0AEF7ED2 }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/cast128/info.txt b/comm/third_party/botan/src/lib/block/cast128/info.txt new file mode 100644 index 0000000000..6e6cf30abe --- /dev/null +++ b/comm/third_party/botan/src/lib/block/cast128/info.txt @@ -0,0 +1,12 @@ +<defines> +CAST -> 20131128 +CAST_128 -> 20171203 +</defines> + +<header:internal> +cast_sboxes.h +</header:internal> + +<header:public> +cast128.h +</header:public> diff --git a/comm/third_party/botan/src/lib/block/cast256/cast256.cpp b/comm/third_party/botan/src/lib/block/cast256/cast256.cpp new file mode 100644 index 0000000000..226955f7cb --- /dev/null +++ b/comm/third_party/botan/src/lib/block/cast256/cast256.cpp @@ -0,0 +1,232 @@ +/* +* CAST-256 +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/cast256.h> +#include <botan/internal/cast_sboxes.h> +#include <botan/loadstor.h> +#include <botan/rotate.h> + +namespace Botan { + +namespace { + +/* +* CAST-256 Round Type 1 +*/ +void round1(uint32_t& out, uint32_t in, uint32_t MK, uint32_t RK) + { + const uint32_t T = rotl_var(MK + in, RK); + out ^= (CAST_SBOX1[get_byte(0, T)] ^ CAST_SBOX2[get_byte(1, T)]) - + CAST_SBOX3[get_byte(2, T)] + CAST_SBOX4[get_byte(3, T)]; + } + +/* +* CAST-256 Round Type 2 +*/ +void round2(uint32_t& out, uint32_t in, uint32_t MK, uint32_t RK) + { + const uint32_t T = rotl_var(MK ^ in, RK); + out ^= (CAST_SBOX1[get_byte(0, T)] - CAST_SBOX2[get_byte(1, T)] + + CAST_SBOX3[get_byte(2, T)]) ^ CAST_SBOX4[get_byte(3, T)]; + } + +/* +* CAST-256 Round Type 3 +*/ +void round3(uint32_t& out, uint32_t in, uint32_t MK, uint32_t RK) + { + const uint32_t T = rotl_var(MK - in, RK); + out ^= ((CAST_SBOX1[get_byte(0, T)] + CAST_SBOX2[get_byte(1, T)]) ^ + CAST_SBOX3[get_byte(2, T)]) - CAST_SBOX4[get_byte(3, T)]; + } + +} + +/* +* CAST-256 Encryption +*/ +void CAST_256::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_RK.empty() == false); + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t A = load_be<uint32_t>(in, 0); + uint32_t B = load_be<uint32_t>(in, 1); + uint32_t C = load_be<uint32_t>(in, 2); + uint32_t D = load_be<uint32_t>(in, 3); + + round1(C, D, m_MK[ 0], m_RK[ 0]); round2(B, C, m_MK[ 1], m_RK[ 1]); + round3(A, B, m_MK[ 2], m_RK[ 2]); round1(D, A, m_MK[ 3], m_RK[ 3]); + round1(C, D, m_MK[ 4], m_RK[ 4]); round2(B, C, m_MK[ 5], m_RK[ 5]); + round3(A, B, m_MK[ 6], m_RK[ 6]); round1(D, A, m_MK[ 7], m_RK[ 7]); + round1(C, D, m_MK[ 8], m_RK[ 8]); round2(B, C, m_MK[ 9], m_RK[ 9]); + round3(A, B, m_MK[10], m_RK[10]); round1(D, A, m_MK[11], m_RK[11]); + round1(C, D, m_MK[12], m_RK[12]); round2(B, C, m_MK[13], m_RK[13]); + round3(A, B, m_MK[14], m_RK[14]); round1(D, A, m_MK[15], m_RK[15]); + round1(C, D, m_MK[16], m_RK[16]); round2(B, C, m_MK[17], m_RK[17]); + round3(A, B, m_MK[18], m_RK[18]); round1(D, A, m_MK[19], m_RK[19]); + round1(C, D, m_MK[20], m_RK[20]); round2(B, C, m_MK[21], m_RK[21]); + round3(A, B, m_MK[22], m_RK[22]); round1(D, A, m_MK[23], m_RK[23]); + round1(D, A, m_MK[27], m_RK[27]); round3(A, B, m_MK[26], m_RK[26]); + round2(B, C, m_MK[25], m_RK[25]); round1(C, D, m_MK[24], m_RK[24]); + round1(D, A, m_MK[31], m_RK[31]); round3(A, B, m_MK[30], m_RK[30]); + round2(B, C, m_MK[29], m_RK[29]); round1(C, D, m_MK[28], m_RK[28]); + round1(D, A, m_MK[35], m_RK[35]); round3(A, B, m_MK[34], m_RK[34]); + round2(B, C, m_MK[33], m_RK[33]); round1(C, D, m_MK[32], m_RK[32]); + round1(D, A, m_MK[39], m_RK[39]); round3(A, B, m_MK[38], m_RK[38]); + round2(B, C, m_MK[37], m_RK[37]); round1(C, D, m_MK[36], m_RK[36]); + round1(D, A, m_MK[43], m_RK[43]); round3(A, B, m_MK[42], m_RK[42]); + round2(B, C, m_MK[41], m_RK[41]); round1(C, D, m_MK[40], m_RK[40]); + round1(D, A, m_MK[47], m_RK[47]); round3(A, B, m_MK[46], m_RK[46]); + round2(B, C, m_MK[45], m_RK[45]); round1(C, D, m_MK[44], m_RK[44]); + + store_be(out, A, B, C, D); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* CAST-256 Decryption +*/ +void CAST_256::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_RK.empty() == false); + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t A = load_be<uint32_t>(in, 0); + uint32_t B = load_be<uint32_t>(in, 1); + uint32_t C = load_be<uint32_t>(in, 2); + uint32_t D = load_be<uint32_t>(in, 3); + + round1(C, D, m_MK[44], m_RK[44]); round2(B, C, m_MK[45], m_RK[45]); + round3(A, B, m_MK[46], m_RK[46]); round1(D, A, m_MK[47], m_RK[47]); + round1(C, D, m_MK[40], m_RK[40]); round2(B, C, m_MK[41], m_RK[41]); + round3(A, B, m_MK[42], m_RK[42]); round1(D, A, m_MK[43], m_RK[43]); + round1(C, D, m_MK[36], m_RK[36]); round2(B, C, m_MK[37], m_RK[37]); + round3(A, B, m_MK[38], m_RK[38]); round1(D, A, m_MK[39], m_RK[39]); + round1(C, D, m_MK[32], m_RK[32]); round2(B, C, m_MK[33], m_RK[33]); + round3(A, B, m_MK[34], m_RK[34]); round1(D, A, m_MK[35], m_RK[35]); + round1(C, D, m_MK[28], m_RK[28]); round2(B, C, m_MK[29], m_RK[29]); + round3(A, B, m_MK[30], m_RK[30]); round1(D, A, m_MK[31], m_RK[31]); + round1(C, D, m_MK[24], m_RK[24]); round2(B, C, m_MK[25], m_RK[25]); + round3(A, B, m_MK[26], m_RK[26]); round1(D, A, m_MK[27], m_RK[27]); + round1(D, A, m_MK[23], m_RK[23]); round3(A, B, m_MK[22], m_RK[22]); + round2(B, C, m_MK[21], m_RK[21]); round1(C, D, m_MK[20], m_RK[20]); + round1(D, A, m_MK[19], m_RK[19]); round3(A, B, m_MK[18], m_RK[18]); + round2(B, C, m_MK[17], m_RK[17]); round1(C, D, m_MK[16], m_RK[16]); + round1(D, A, m_MK[15], m_RK[15]); round3(A, B, m_MK[14], m_RK[14]); + round2(B, C, m_MK[13], m_RK[13]); round1(C, D, m_MK[12], m_RK[12]); + round1(D, A, m_MK[11], m_RK[11]); round3(A, B, m_MK[10], m_RK[10]); + round2(B, C, m_MK[ 9], m_RK[ 9]); round1(C, D, m_MK[ 8], m_RK[ 8]); + round1(D, A, m_MK[ 7], m_RK[ 7]); round3(A, B, m_MK[ 6], m_RK[ 6]); + round2(B, C, m_MK[ 5], m_RK[ 5]); round1(C, D, m_MK[ 4], m_RK[ 4]); + round1(D, A, m_MK[ 3], m_RK[ 3]); round3(A, B, m_MK[ 2], m_RK[ 2]); + round2(B, C, m_MK[ 1], m_RK[ 1]); round1(C, D, m_MK[ 0], m_RK[ 0]); + + store_be(out, A, B, C, D); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* CAST-256 Key Schedule +*/ +void CAST_256::key_schedule(const uint8_t key[], size_t length) + { + static const uint32_t KEY_MASK[192] = { + 0x5A827999, 0xC95C653A, 0x383650DB, 0xA7103C7C, 0x15EA281D, 0x84C413BE, + 0xF39DFF5F, 0x6277EB00, 0xD151D6A1, 0x402BC242, 0xAF05ADE3, 0x1DDF9984, + 0x8CB98525, 0xFB9370C6, 0x6A6D5C67, 0xD9474808, 0x482133A9, 0xB6FB1F4A, + 0x25D50AEB, 0x94AEF68C, 0x0388E22D, 0x7262CDCE, 0xE13CB96F, 0x5016A510, + 0xBEF090B1, 0x2DCA7C52, 0x9CA467F3, 0x0B7E5394, 0x7A583F35, 0xE9322AD6, + 0x580C1677, 0xC6E60218, 0x35BFEDB9, 0xA499D95A, 0x1373C4FB, 0x824DB09C, + 0xF1279C3D, 0x600187DE, 0xCEDB737F, 0x3DB55F20, 0xAC8F4AC1, 0x1B693662, + 0x8A432203, 0xF91D0DA4, 0x67F6F945, 0xD6D0E4E6, 0x45AAD087, 0xB484BC28, + 0x235EA7C9, 0x9238936A, 0x01127F0B, 0x6FEC6AAC, 0xDEC6564D, 0x4DA041EE, + 0xBC7A2D8F, 0x2B541930, 0x9A2E04D1, 0x0907F072, 0x77E1DC13, 0xE6BBC7B4, + 0x5595B355, 0xC46F9EF6, 0x33498A97, 0xA2237638, 0x10FD61D9, 0x7FD74D7A, + 0xEEB1391B, 0x5D8B24BC, 0xCC65105D, 0x3B3EFBFE, 0xAA18E79F, 0x18F2D340, + 0x87CCBEE1, 0xF6A6AA82, 0x65809623, 0xD45A81C4, 0x43346D65, 0xB20E5906, + 0x20E844A7, 0x8FC23048, 0xFE9C1BE9, 0x6D76078A, 0xDC4FF32B, 0x4B29DECC, + 0xBA03CA6D, 0x28DDB60E, 0x97B7A1AF, 0x06918D50, 0x756B78F1, 0xE4456492, + 0x531F5033, 0xC1F93BD4, 0x30D32775, 0x9FAD1316, 0x0E86FEB7, 0x7D60EA58, + 0xEC3AD5F9, 0x5B14C19A, 0xC9EEAD3B, 0x38C898DC, 0xA7A2847D, 0x167C701E, + 0x85565BBF, 0xF4304760, 0x630A3301, 0xD1E41EA2, 0x40BE0A43, 0xAF97F5E4, + 0x1E71E185, 0x8D4BCD26, 0xFC25B8C7, 0x6AFFA468, 0xD9D99009, 0x48B37BAA, + 0xB78D674B, 0x266752EC, 0x95413E8D, 0x041B2A2E, 0x72F515CF, 0xE1CF0170, + 0x50A8ED11, 0xBF82D8B2, 0x2E5CC453, 0x9D36AFF4, 0x0C109B95, 0x7AEA8736, + 0xE9C472D7, 0x589E5E78, 0xC7784A19, 0x365235BA, 0xA52C215B, 0x14060CFC, + 0x82DFF89D, 0xF1B9E43E, 0x6093CFDF, 0xCF6DBB80, 0x3E47A721, 0xAD2192C2, + 0x1BFB7E63, 0x8AD56A04, 0xF9AF55A5, 0x68894146, 0xD7632CE7, 0x463D1888, + 0xB5170429, 0x23F0EFCA, 0x92CADB6B, 0x01A4C70C, 0x707EB2AD, 0xDF589E4E, + 0x4E3289EF, 0xBD0C7590, 0x2BE66131, 0x9AC04CD2, 0x099A3873, 0x78742414, + 0xE74E0FB5, 0x5627FB56, 0xC501E6F7, 0x33DBD298, 0xA2B5BE39, 0x118FA9DA, + 0x8069957B, 0xEF43811C, 0x5E1D6CBD, 0xCCF7585E, 0x3BD143FF, 0xAAAB2FA0, + 0x19851B41, 0x885F06E2, 0xF738F283, 0x6612DE24, 0xD4ECC9C5, 0x43C6B566, + 0xB2A0A107, 0x217A8CA8, 0x90547849, 0xFF2E63EA, 0x6E084F8B, 0xDCE23B2C, + 0x4BBC26CD, 0xBA96126E, 0x296FFE0F, 0x9849E9B0, 0x0723D551, 0x75FDC0F2, + 0xE4D7AC93, 0x53B19834, 0xC28B83D5, 0x31656F76, 0xA03F5B17, 0x0F1946B8 }; + + static const uint8_t KEY_ROT[32] = { + 0x13, 0x04, 0x15, 0x06, 0x17, 0x08, 0x19, 0x0A, 0x1B, 0x0C, + 0x1D, 0x0E, 0x1F, 0x10, 0x01, 0x12, 0x03, 0x14, 0x05, 0x16, + 0x07, 0x18, 0x09, 0x1A, 0x0B, 0x1C, 0x0D, 0x1E, 0x0F, 0x00, + 0x11, 0x02 }; + + m_MK.resize(48); + m_RK.resize(48); + + secure_vector<uint32_t> K(8); + for(size_t i = 0; i != length; ++i) + K[i/4] = (K[i/4] << 8) + key[i]; + + uint32_t A = K[0], B = K[1], C = K[2], D = K[3], + E = K[4], F = K[5], G = K[6], H = K[7]; + + for(size_t i = 0; i != 48; i += 4) + { + round1(G, H, KEY_MASK[4*i+ 0], KEY_ROT[(4*i+ 0) % 32]); + round2(F, G, KEY_MASK[4*i+ 1], KEY_ROT[(4*i+ 1) % 32]); + round3(E, F, KEY_MASK[4*i+ 2], KEY_ROT[(4*i+ 2) % 32]); + round1(D, E, KEY_MASK[4*i+ 3], KEY_ROT[(4*i+ 3) % 32]); + round2(C, D, KEY_MASK[4*i+ 4], KEY_ROT[(4*i+ 4) % 32]); + round3(B, C, KEY_MASK[4*i+ 5], KEY_ROT[(4*i+ 5) % 32]); + round1(A, B, KEY_MASK[4*i+ 6], KEY_ROT[(4*i+ 6) % 32]); + round2(H, A, KEY_MASK[4*i+ 7], KEY_ROT[(4*i+ 7) % 32]); + round1(G, H, KEY_MASK[4*i+ 8], KEY_ROT[(4*i+ 8) % 32]); + round2(F, G, KEY_MASK[4*i+ 9], KEY_ROT[(4*i+ 9) % 32]); + round3(E, F, KEY_MASK[4*i+10], KEY_ROT[(4*i+10) % 32]); + round1(D, E, KEY_MASK[4*i+11], KEY_ROT[(4*i+11) % 32]); + round2(C, D, KEY_MASK[4*i+12], KEY_ROT[(4*i+12) % 32]); + round3(B, C, KEY_MASK[4*i+13], KEY_ROT[(4*i+13) % 32]); + round1(A, B, KEY_MASK[4*i+14], KEY_ROT[(4*i+14) % 32]); + round2(H, A, KEY_MASK[4*i+15], KEY_ROT[(4*i+15) % 32]); + + m_RK[i ] = (A % 32); + m_RK[i+1] = (C % 32); + m_RK[i+2] = (E % 32); + m_RK[i+3] = (G % 32); + m_MK[i ] = H; + m_MK[i+1] = F; + m_MK[i+2] = D; + m_MK[i+3] = B; + } + } + +void CAST_256::clear() + { + zap(m_MK); + zap(m_RK); + } + +} diff --git a/comm/third_party/botan/src/lib/block/cast256/cast256.h b/comm/third_party/botan/src/lib/block/cast256/cast256.h new file mode 100644 index 0000000000..3c30169372 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/cast256/cast256.h @@ -0,0 +1,38 @@ +/* +* CAST-256 +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_CAST256_H_ +#define BOTAN_CAST256_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(cast256.h) + +namespace Botan { + +/** +* CAST-256 +*/ +class BOTAN_PUBLIC_API(2,0) CAST_256 final : public Block_Cipher_Fixed_Params<16, 4, 32, 4> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string name() const override { return "CAST-256"; } + BlockCipher* clone() const override { return new CAST_256; } + private: + void key_schedule(const uint8_t[], size_t) override; + + secure_vector<uint32_t> m_MK; + secure_vector<uint8_t> m_RK; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/cast256/info.txt b/comm/third_party/botan/src/lib/block/cast256/info.txt new file mode 100644 index 0000000000..b109fe0533 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/cast256/info.txt @@ -0,0 +1,7 @@ +<defines> +CAST_256 -> 20171203 +</defines> + +<requires> +cast128 +</requires> diff --git a/comm/third_party/botan/src/lib/block/des/des.cpp b/comm/third_party/botan/src/lib/block/des/des.cpp new file mode 100644 index 0000000000..0aa9e6a791 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/des/des.cpp @@ -0,0 +1,410 @@ +/* +* DES +* (C) 1999-2008,2018 Jack Lloyd +* +* Based on a public domain implemenation by Phil Karn (who in turn +* credited Richard Outerbridge and Jim Gillogly) +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/des.h> +#include <botan/loadstor.h> +#include <botan/rotate.h> + +namespace Botan { + +namespace { + +/* +* DES Key Schedule +*/ +void des_key_schedule(uint32_t round_key[32], const uint8_t key[8]) + { + static const uint8_t ROT[16] = { 1, 1, 2, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 1 }; + + uint32_t C = ((key[7] & 0x80) << 20) | ((key[6] & 0x80) << 19) | + ((key[5] & 0x80) << 18) | ((key[4] & 0x80) << 17) | + ((key[3] & 0x80) << 16) | ((key[2] & 0x80) << 15) | + ((key[1] & 0x80) << 14) | ((key[0] & 0x80) << 13) | + ((key[7] & 0x40) << 13) | ((key[6] & 0x40) << 12) | + ((key[5] & 0x40) << 11) | ((key[4] & 0x40) << 10) | + ((key[3] & 0x40) << 9) | ((key[2] & 0x40) << 8) | + ((key[1] & 0x40) << 7) | ((key[0] & 0x40) << 6) | + ((key[7] & 0x20) << 6) | ((key[6] & 0x20) << 5) | + ((key[5] & 0x20) << 4) | ((key[4] & 0x20) << 3) | + ((key[3] & 0x20) << 2) | ((key[2] & 0x20) << 1) | + ((key[1] & 0x20) ) | ((key[0] & 0x20) >> 1) | + ((key[7] & 0x10) >> 1) | ((key[6] & 0x10) >> 2) | + ((key[5] & 0x10) >> 3) | ((key[4] & 0x10) >> 4); + uint32_t D = ((key[7] & 0x02) << 26) | ((key[6] & 0x02) << 25) | + ((key[5] & 0x02) << 24) | ((key[4] & 0x02) << 23) | + ((key[3] & 0x02) << 22) | ((key[2] & 0x02) << 21) | + ((key[1] & 0x02) << 20) | ((key[0] & 0x02) << 19) | + ((key[7] & 0x04) << 17) | ((key[6] & 0x04) << 16) | + ((key[5] & 0x04) << 15) | ((key[4] & 0x04) << 14) | + ((key[3] & 0x04) << 13) | ((key[2] & 0x04) << 12) | + ((key[1] & 0x04) << 11) | ((key[0] & 0x04) << 10) | + ((key[7] & 0x08) << 8) | ((key[6] & 0x08) << 7) | + ((key[5] & 0x08) << 6) | ((key[4] & 0x08) << 5) | + ((key[3] & 0x08) << 4) | ((key[2] & 0x08) << 3) | + ((key[1] & 0x08) << 2) | ((key[0] & 0x08) << 1) | + ((key[3] & 0x10) >> 1) | ((key[2] & 0x10) >> 2) | + ((key[1] & 0x10) >> 3) | ((key[0] & 0x10) >> 4); + + for(size_t i = 0; i != 16; ++i) + { + C = ((C << ROT[i]) | (C >> (28-ROT[i]))) & 0x0FFFFFFF; + D = ((D << ROT[i]) | (D >> (28-ROT[i]))) & 0x0FFFFFFF; + round_key[2*i ] = ((C & 0x00000010) << 22) | ((C & 0x00000800) << 17) | + ((C & 0x00000020) << 16) | ((C & 0x00004004) << 15) | + ((C & 0x00000200) << 11) | ((C & 0x00020000) << 10) | + ((C & 0x01000000) >> 6) | ((C & 0x00100000) >> 4) | + ((C & 0x00010000) << 3) | ((C & 0x08000000) >> 2) | + ((C & 0x00800000) << 1) | ((D & 0x00000010) << 8) | + ((D & 0x00000002) << 7) | ((D & 0x00000001) << 2) | + ((D & 0x00000200) ) | ((D & 0x00008000) >> 2) | + ((D & 0x00000088) >> 3) | ((D & 0x00001000) >> 7) | + ((D & 0x00080000) >> 9) | ((D & 0x02020000) >> 14) | + ((D & 0x00400000) >> 21); + round_key[2*i+1] = ((C & 0x00000001) << 28) | ((C & 0x00000082) << 18) | + ((C & 0x00002000) << 14) | ((C & 0x00000100) << 10) | + ((C & 0x00001000) << 9) | ((C & 0x00040000) << 6) | + ((C & 0x02400000) << 4) | ((C & 0x00008000) << 2) | + ((C & 0x00200000) >> 1) | ((C & 0x04000000) >> 10) | + ((D & 0x00000020) << 6) | ((D & 0x00000100) ) | + ((D & 0x00000800) >> 1) | ((D & 0x00000040) >> 3) | + ((D & 0x00010000) >> 4) | ((D & 0x00000400) >> 5) | + ((D & 0x00004000) >> 10) | ((D & 0x04000000) >> 13) | + ((D & 0x00800000) >> 14) | ((D & 0x00100000) >> 18) | + ((D & 0x01000000) >> 24) | ((D & 0x08000000) >> 26); + } + } + +inline uint32_t spbox(uint32_t T0, uint32_t T1) + { + return DES_SPBOX1[get_byte(0, T0)] ^ DES_SPBOX2[get_byte(0, T1)] ^ + DES_SPBOX3[get_byte(1, T0)] ^ DES_SPBOX4[get_byte(1, T1)] ^ + DES_SPBOX5[get_byte(2, T0)] ^ DES_SPBOX6[get_byte(2, T1)] ^ + DES_SPBOX7[get_byte(3, T0)] ^ DES_SPBOX8[get_byte(3, T1)]; + } + +/* +* DES Encryption +*/ +inline void des_encrypt(uint32_t& Lr, uint32_t& Rr, + const uint32_t round_key[32]) + { + uint32_t L = Lr; + uint32_t R = Rr; + for(size_t i = 0; i != 16; i += 2) + { + L ^= spbox(rotr<4>(R) ^ round_key[2*i ], R ^ round_key[2*i+1]); + R ^= spbox(rotr<4>(L) ^ round_key[2*i+2], L ^ round_key[2*i+3]); + } + + Lr = L; + Rr = R; + } + +inline void des_encrypt_x2(uint32_t& L0r, uint32_t& R0r, + uint32_t& L1r, uint32_t& R1r, + const uint32_t round_key[32]) + { + uint32_t L0 = L0r; + uint32_t R0 = R0r; + uint32_t L1 = L1r; + uint32_t R1 = R1r; + + for(size_t i = 0; i != 16; i += 2) + { + L0 ^= spbox(rotr<4>(R0) ^ round_key[2*i ], R0 ^ round_key[2*i+1]); + L1 ^= spbox(rotr<4>(R1) ^ round_key[2*i ], R1 ^ round_key[2*i+1]); + + R0 ^= spbox(rotr<4>(L0) ^ round_key[2*i+2], L0 ^ round_key[2*i+3]); + R1 ^= spbox(rotr<4>(L1) ^ round_key[2*i+2], L1 ^ round_key[2*i+3]); + } + + L0r = L0; + R0r = R0; + L1r = L1; + R1r = R1; + } + +/* +* DES Decryption +*/ +inline void des_decrypt(uint32_t& Lr, uint32_t& Rr, + const uint32_t round_key[32]) + { + uint32_t L = Lr; + uint32_t R = Rr; + for(size_t i = 16; i != 0; i -= 2) + { + L ^= spbox(rotr<4>(R) ^ round_key[2*i - 2], R ^ round_key[2*i - 1]); + R ^= spbox(rotr<4>(L) ^ round_key[2*i - 4], L ^ round_key[2*i - 3]); + } + Lr = L; + Rr = R; + } + +inline void des_decrypt_x2(uint32_t& L0r, uint32_t& R0r, + uint32_t& L1r, uint32_t& R1r, + const uint32_t round_key[32]) + { + uint32_t L0 = L0r; + uint32_t R0 = R0r; + uint32_t L1 = L1r; + uint32_t R1 = R1r; + + for(size_t i = 16; i != 0; i -= 2) + { + L0 ^= spbox(rotr<4>(R0) ^ round_key[2*i - 2], R0 ^ round_key[2*i - 1]); + L1 ^= spbox(rotr<4>(R1) ^ round_key[2*i - 2], R1 ^ round_key[2*i - 1]); + + R0 ^= spbox(rotr<4>(L0) ^ round_key[2*i - 4], L0 ^ round_key[2*i - 3]); + R1 ^= spbox(rotr<4>(L1) ^ round_key[2*i - 4], L1 ^ round_key[2*i - 3]); + } + + L0r = L0; + R0r = R0; + L1r = L1; + R1r = R1; + } + +inline void des_IP(uint32_t& L, uint32_t& R, const uint8_t block[]) + { + // IP sequence by Wei Dai, taken from public domain Crypto++ + L = load_be<uint32_t>(block, 0); + R = load_be<uint32_t>(block, 1); + + uint32_t T; + R = rotl<4>(R); + T = (L ^ R) & 0xF0F0F0F0; + L ^= T; + R = rotr<20>(R ^ T); + T = (L ^ R) & 0xFFFF0000; + L ^= T; + R = rotr<18>(R ^ T); + T = (L ^ R) & 0x33333333; + L ^= T; + R = rotr<6>(R ^ T); + T = (L ^ R) & 0x00FF00FF; + L ^= T; + R = rotl<9>(R ^ T); + T = (L ^ R) & 0xAAAAAAAA; + L = rotl<1>(L ^ T); + R ^= T; + } + +inline void des_FP(uint32_t L, uint32_t R, uint8_t out[]) + { + // FP sequence by Wei Dai, taken from public domain Crypto++ + uint32_t T; + + R = rotr<1>(R); + T = (L ^ R) & 0xAAAAAAAA; + R ^= T; + L = rotr<9>(L ^ T); + T = (L ^ R) & 0x00FF00FF; + R ^= T; + L = rotl<6>(L ^ T); + T = (L ^ R) & 0x33333333; + R ^= T; + L = rotl<18>(L ^ T); + T = (L ^ R) & 0xFFFF0000; + R ^= T; + L = rotl<20>(L ^ T); + T = (L ^ R) & 0xF0F0F0F0; + R ^= T; + L = rotr<4>(L ^ T); + + store_be(out, R, L); + } + +} + +/* +* DES Encryption +*/ +void DES::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_round_key.empty() == false); + + while(blocks >= 2) + { + uint32_t L0, R0; + uint32_t L1, R1; + + des_IP(L0, R0, in); + des_IP(L1, R1, in + BLOCK_SIZE); + + des_encrypt_x2(L0, R0, L1, R1, m_round_key.data()); + + des_FP(L0, R0, out); + des_FP(L1, R1, out + BLOCK_SIZE); + + in += 2*BLOCK_SIZE; + out += 2*BLOCK_SIZE; + blocks -= 2; + } + + for(size_t i = 0; i < blocks; ++i) + { + uint32_t L, R; + des_IP(L, R, in + BLOCK_SIZE*i); + des_encrypt(L, R, m_round_key.data()); + des_FP(L, R, out + BLOCK_SIZE*i); + } + } + +/* +* DES Decryption +*/ +void DES::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_round_key.empty() == false); + + while(blocks >= 2) + { + uint32_t L0, R0; + uint32_t L1, R1; + + des_IP(L0, R0, in); + des_IP(L1, R1, in + BLOCK_SIZE); + + des_decrypt_x2(L0, R0, L1, R1, m_round_key.data()); + + des_FP(L0, R0, out); + des_FP(L1, R1, out + BLOCK_SIZE); + + in += 2*BLOCK_SIZE; + out += 2*BLOCK_SIZE; + blocks -= 2; + } + + for(size_t i = 0; i < blocks; ++i) + { + uint32_t L, R; + des_IP(L, R, in + BLOCK_SIZE*i); + des_decrypt(L, R, m_round_key.data()); + des_FP(L, R, out + BLOCK_SIZE*i); + } + } + +/* +* DES Key Schedule +*/ +void DES::key_schedule(const uint8_t key[], size_t) + { + m_round_key.resize(32); + des_key_schedule(m_round_key.data(), key); + } + +void DES::clear() + { + zap(m_round_key); + } + +/* +* TripleDES Encryption +*/ +void TripleDES::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_round_key.empty() == false); + + while(blocks >= 2) + { + uint32_t L0, R0; + uint32_t L1, R1; + + des_IP(L0, R0, in); + des_IP(L1, R1, in + BLOCK_SIZE); + + des_encrypt_x2(L0, R0, L1, R1, &m_round_key[0]); + des_decrypt_x2(R0, L0, R1, L1, &m_round_key[32]); + des_encrypt_x2(L0, R0, L1, R1, &m_round_key[64]); + + des_FP(L0, R0, out); + des_FP(L1, R1, out + BLOCK_SIZE); + + in += 2*BLOCK_SIZE; + out += 2*BLOCK_SIZE; + blocks -= 2; + } + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t L, R; + des_IP(L, R, in + BLOCK_SIZE*i); + + des_encrypt(L, R, &m_round_key[0]); + des_decrypt(R, L, &m_round_key[32]); + des_encrypt(L, R, &m_round_key[64]); + + des_FP(L, R, out + BLOCK_SIZE*i); + } + } + +/* +* TripleDES Decryption +*/ +void TripleDES::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_round_key.empty() == false); + + while(blocks >= 2) + { + uint32_t L0, R0; + uint32_t L1, R1; + + des_IP(L0, R0, in); + des_IP(L1, R1, in + BLOCK_SIZE); + + des_decrypt_x2(L0, R0, L1, R1, &m_round_key[64]); + des_encrypt_x2(R0, L0, R1, L1, &m_round_key[32]); + des_decrypt_x2(L0, R0, L1, R1, &m_round_key[0]); + + des_FP(L0, R0, out); + des_FP(L1, R1, out + BLOCK_SIZE); + + in += 2*BLOCK_SIZE; + out += 2*BLOCK_SIZE; + blocks -= 2; + } + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t L, R; + des_IP(L, R, in + BLOCK_SIZE*i); + + des_decrypt(L, R, &m_round_key[64]); + des_encrypt(R, L, &m_round_key[32]); + des_decrypt(L, R, &m_round_key[0]); + + des_FP(L, R, out + BLOCK_SIZE*i); + } + } + +/* +* TripleDES Key Schedule +*/ +void TripleDES::key_schedule(const uint8_t key[], size_t length) + { + m_round_key.resize(3*32); + des_key_schedule(&m_round_key[0], key); + des_key_schedule(&m_round_key[32], key + 8); + + if(length == 24) + des_key_schedule(&m_round_key[64], key + 16); + else + copy_mem(&m_round_key[64], &m_round_key[0], 32); + } + +void TripleDES::clear() + { + zap(m_round_key); + } + +} diff --git a/comm/third_party/botan/src/lib/block/des/des.h b/comm/third_party/botan/src/lib/block/des/des.h new file mode 100644 index 0000000000..d8bbcfdd10 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/des/des.h @@ -0,0 +1,67 @@ +/* +* DES +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_DES_H_ +#define BOTAN_DES_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(des.h) + +namespace Botan { + +/** +* DES +*/ +class BOTAN_PUBLIC_API(2,0) DES final : public Block_Cipher_Fixed_Params<8, 8> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string name() const override { return "DES"; } + BlockCipher* clone() const override { return new DES; } + private: + void key_schedule(const uint8_t[], size_t) override; + + secure_vector<uint32_t> m_round_key; + }; + +/** +* Triple DES +*/ +class BOTAN_PUBLIC_API(2,0) TripleDES final : public Block_Cipher_Fixed_Params<8, 16, 24, 8> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string name() const override { return "TripleDES"; } + BlockCipher* clone() const override { return new TripleDES; } + private: + void key_schedule(const uint8_t[], size_t) override; + + secure_vector<uint32_t> m_round_key; + }; + +/* +* DES Tables +*/ +extern const uint32_t DES_SPBOX1[256]; +extern const uint32_t DES_SPBOX2[256]; +extern const uint32_t DES_SPBOX3[256]; +extern const uint32_t DES_SPBOX4[256]; +extern const uint32_t DES_SPBOX5[256]; +extern const uint32_t DES_SPBOX6[256]; +extern const uint32_t DES_SPBOX7[256]; +extern const uint32_t DES_SPBOX8[256]; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/des/des_tab.cpp b/comm/third_party/botan/src/lib/block/des/des_tab.cpp new file mode 100644 index 0000000000..cb6ab4e7e9 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/des/des_tab.cpp @@ -0,0 +1,372 @@ +/* +* Substitution/Permutation Tables for DES +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/des.h> + +namespace Botan { + +alignas(64) const uint32_t DES_SPBOX1[256] = { + 0x01010400, 0x00000000, 0x00010000, 0x01010404, 0x01010004, 0x00010404, + 0x00000004, 0x00010000, 0x00000400, 0x01010400, 0x01010404, 0x00000400, + 0x01000404, 0x01010004, 0x01000000, 0x00000004, 0x00000404, 0x01000400, + 0x01000400, 0x00010400, 0x00010400, 0x01010000, 0x01010000, 0x01000404, + 0x00010004, 0x01000004, 0x01000004, 0x00010004, 0x00000000, 0x00000404, + 0x00010404, 0x01000000, 0x00010000, 0x01010404, 0x00000004, 0x01010000, + 0x01010400, 0x01000000, 0x01000000, 0x00000400, 0x01010004, 0x00010000, + 0x00010400, 0x01000004, 0x00000400, 0x00000004, 0x01000404, 0x00010404, + 0x01010404, 0x00010004, 0x01010000, 0x01000404, 0x01000004, 0x00000404, + 0x00010404, 0x01010400, 0x00000404, 0x01000400, 0x01000400, 0x00000000, + 0x00010004, 0x00010400, 0x00000000, 0x01010004, 0x01010400, 0x00000000, + 0x00010000, 0x01010404, 0x01010004, 0x00010404, 0x00000004, 0x00010000, + 0x00000400, 0x01010400, 0x01010404, 0x00000400, 0x01000404, 0x01010004, + 0x01000000, 0x00000004, 0x00000404, 0x01000400, 0x01000400, 0x00010400, + 0x00010400, 0x01010000, 0x01010000, 0x01000404, 0x00010004, 0x01000004, + 0x01000004, 0x00010004, 0x00000000, 0x00000404, 0x00010404, 0x01000000, + 0x00010000, 0x01010404, 0x00000004, 0x01010000, 0x01010400, 0x01000000, + 0x01000000, 0x00000400, 0x01010004, 0x00010000, 0x00010400, 0x01000004, + 0x00000400, 0x00000004, 0x01000404, 0x00010404, 0x01010404, 0x00010004, + 0x01010000, 0x01000404, 0x01000004, 0x00000404, 0x00010404, 0x01010400, + 0x00000404, 0x01000400, 0x01000400, 0x00000000, 0x00010004, 0x00010400, + 0x00000000, 0x01010004, 0x01010400, 0x00000000, 0x00010000, 0x01010404, + 0x01010004, 0x00010404, 0x00000004, 0x00010000, 0x00000400, 0x01010400, + 0x01010404, 0x00000400, 0x01000404, 0x01010004, 0x01000000, 0x00000004, + 0x00000404, 0x01000400, 0x01000400, 0x00010400, 0x00010400, 0x01010000, + 0x01010000, 0x01000404, 0x00010004, 0x01000004, 0x01000004, 0x00010004, + 0x00000000, 0x00000404, 0x00010404, 0x01000000, 0x00010000, 0x01010404, + 0x00000004, 0x01010000, 0x01010400, 0x01000000, 0x01000000, 0x00000400, + 0x01010004, 0x00010000, 0x00010400, 0x01000004, 0x00000400, 0x00000004, + 0x01000404, 0x00010404, 0x01010404, 0x00010004, 0x01010000, 0x01000404, + 0x01000004, 0x00000404, 0x00010404, 0x01010400, 0x00000404, 0x01000400, + 0x01000400, 0x00000000, 0x00010004, 0x00010400, 0x00000000, 0x01010004, + 0x01010400, 0x00000000, 0x00010000, 0x01010404, 0x01010004, 0x00010404, + 0x00000004, 0x00010000, 0x00000400, 0x01010400, 0x01010404, 0x00000400, + 0x01000404, 0x01010004, 0x01000000, 0x00000004, 0x00000404, 0x01000400, + 0x01000400, 0x00010400, 0x00010400, 0x01010000, 0x01010000, 0x01000404, + 0x00010004, 0x01000004, 0x01000004, 0x00010004, 0x00000000, 0x00000404, + 0x00010404, 0x01000000, 0x00010000, 0x01010404, 0x00000004, 0x01010000, + 0x01010400, 0x01000000, 0x01000000, 0x00000400, 0x01010004, 0x00010000, + 0x00010400, 0x01000004, 0x00000400, 0x00000004, 0x01000404, 0x00010404, + 0x01010404, 0x00010004, 0x01010000, 0x01000404, 0x01000004, 0x00000404, + 0x00010404, 0x01010400, 0x00000404, 0x01000400, 0x01000400, 0x00000000, + 0x00010004, 0x00010400, 0x00000000, 0x01010004 }; + +alignas(64) const uint32_t DES_SPBOX2[256] = { + 0x80108020, 0x80008000, 0x00008000, 0x00108020, 0x00100000, 0x00000020, + 0x80100020, 0x80008020, 0x80000020, 0x80108020, 0x80108000, 0x80000000, + 0x80008000, 0x00100000, 0x00000020, 0x80100020, 0x00108000, 0x00100020, + 0x80008020, 0x00000000, 0x80000000, 0x00008000, 0x00108020, 0x80100000, + 0x00100020, 0x80000020, 0x00000000, 0x00108000, 0x00008020, 0x80108000, + 0x80100000, 0x00008020, 0x00000000, 0x00108020, 0x80100020, 0x00100000, + 0x80008020, 0x80100000, 0x80108000, 0x00008000, 0x80100000, 0x80008000, + 0x00000020, 0x80108020, 0x00108020, 0x00000020, 0x00008000, 0x80000000, + 0x00008020, 0x80108000, 0x00100000, 0x80000020, 0x00100020, 0x80008020, + 0x80000020, 0x00100020, 0x00108000, 0x00000000, 0x80008000, 0x00008020, + 0x80000000, 0x80100020, 0x80108020, 0x00108000, 0x80108020, 0x80008000, + 0x00008000, 0x00108020, 0x00100000, 0x00000020, 0x80100020, 0x80008020, + 0x80000020, 0x80108020, 0x80108000, 0x80000000, 0x80008000, 0x00100000, + 0x00000020, 0x80100020, 0x00108000, 0x00100020, 0x80008020, 0x00000000, + 0x80000000, 0x00008000, 0x00108020, 0x80100000, 0x00100020, 0x80000020, + 0x00000000, 0x00108000, 0x00008020, 0x80108000, 0x80100000, 0x00008020, + 0x00000000, 0x00108020, 0x80100020, 0x00100000, 0x80008020, 0x80100000, + 0x80108000, 0x00008000, 0x80100000, 0x80008000, 0x00000020, 0x80108020, + 0x00108020, 0x00000020, 0x00008000, 0x80000000, 0x00008020, 0x80108000, + 0x00100000, 0x80000020, 0x00100020, 0x80008020, 0x80000020, 0x00100020, + 0x00108000, 0x00000000, 0x80008000, 0x00008020, 0x80000000, 0x80100020, + 0x80108020, 0x00108000, 0x80108020, 0x80008000, 0x00008000, 0x00108020, + 0x00100000, 0x00000020, 0x80100020, 0x80008020, 0x80000020, 0x80108020, + 0x80108000, 0x80000000, 0x80008000, 0x00100000, 0x00000020, 0x80100020, + 0x00108000, 0x00100020, 0x80008020, 0x00000000, 0x80000000, 0x00008000, + 0x00108020, 0x80100000, 0x00100020, 0x80000020, 0x00000000, 0x00108000, + 0x00008020, 0x80108000, 0x80100000, 0x00008020, 0x00000000, 0x00108020, + 0x80100020, 0x00100000, 0x80008020, 0x80100000, 0x80108000, 0x00008000, + 0x80100000, 0x80008000, 0x00000020, 0x80108020, 0x00108020, 0x00000020, + 0x00008000, 0x80000000, 0x00008020, 0x80108000, 0x00100000, 0x80000020, + 0x00100020, 0x80008020, 0x80000020, 0x00100020, 0x00108000, 0x00000000, + 0x80008000, 0x00008020, 0x80000000, 0x80100020, 0x80108020, 0x00108000, + 0x80108020, 0x80008000, 0x00008000, 0x00108020, 0x00100000, 0x00000020, + 0x80100020, 0x80008020, 0x80000020, 0x80108020, 0x80108000, 0x80000000, + 0x80008000, 0x00100000, 0x00000020, 0x80100020, 0x00108000, 0x00100020, + 0x80008020, 0x00000000, 0x80000000, 0x00008000, 0x00108020, 0x80100000, + 0x00100020, 0x80000020, 0x00000000, 0x00108000, 0x00008020, 0x80108000, + 0x80100000, 0x00008020, 0x00000000, 0x00108020, 0x80100020, 0x00100000, + 0x80008020, 0x80100000, 0x80108000, 0x00008000, 0x80100000, 0x80008000, + 0x00000020, 0x80108020, 0x00108020, 0x00000020, 0x00008000, 0x80000000, + 0x00008020, 0x80108000, 0x00100000, 0x80000020, 0x00100020, 0x80008020, + 0x80000020, 0x00100020, 0x00108000, 0x00000000, 0x80008000, 0x00008020, + 0x80000000, 0x80100020, 0x80108020, 0x00108000 }; + +alignas(64) const uint32_t DES_SPBOX3[256] = { + 0x00000208, 0x08020200, 0x00000000, 0x08020008, 0x08000200, 0x00000000, + 0x00020208, 0x08000200, 0x00020008, 0x08000008, 0x08000008, 0x00020000, + 0x08020208, 0x00020008, 0x08020000, 0x00000208, 0x08000000, 0x00000008, + 0x08020200, 0x00000200, 0x00020200, 0x08020000, 0x08020008, 0x00020208, + 0x08000208, 0x00020200, 0x00020000, 0x08000208, 0x00000008, 0x08020208, + 0x00000200, 0x08000000, 0x08020200, 0x08000000, 0x00020008, 0x00000208, + 0x00020000, 0x08020200, 0x08000200, 0x00000000, 0x00000200, 0x00020008, + 0x08020208, 0x08000200, 0x08000008, 0x00000200, 0x00000000, 0x08020008, + 0x08000208, 0x00020000, 0x08000000, 0x08020208, 0x00000008, 0x00020208, + 0x00020200, 0x08000008, 0x08020000, 0x08000208, 0x00000208, 0x08020000, + 0x00020208, 0x00000008, 0x08020008, 0x00020200, 0x00000208, 0x08020200, + 0x00000000, 0x08020008, 0x08000200, 0x00000000, 0x00020208, 0x08000200, + 0x00020008, 0x08000008, 0x08000008, 0x00020000, 0x08020208, 0x00020008, + 0x08020000, 0x00000208, 0x08000000, 0x00000008, 0x08020200, 0x00000200, + 0x00020200, 0x08020000, 0x08020008, 0x00020208, 0x08000208, 0x00020200, + 0x00020000, 0x08000208, 0x00000008, 0x08020208, 0x00000200, 0x08000000, + 0x08020200, 0x08000000, 0x00020008, 0x00000208, 0x00020000, 0x08020200, + 0x08000200, 0x00000000, 0x00000200, 0x00020008, 0x08020208, 0x08000200, + 0x08000008, 0x00000200, 0x00000000, 0x08020008, 0x08000208, 0x00020000, + 0x08000000, 0x08020208, 0x00000008, 0x00020208, 0x00020200, 0x08000008, + 0x08020000, 0x08000208, 0x00000208, 0x08020000, 0x00020208, 0x00000008, + 0x08020008, 0x00020200, 0x00000208, 0x08020200, 0x00000000, 0x08020008, + 0x08000200, 0x00000000, 0x00020208, 0x08000200, 0x00020008, 0x08000008, + 0x08000008, 0x00020000, 0x08020208, 0x00020008, 0x08020000, 0x00000208, + 0x08000000, 0x00000008, 0x08020200, 0x00000200, 0x00020200, 0x08020000, + 0x08020008, 0x00020208, 0x08000208, 0x00020200, 0x00020000, 0x08000208, + 0x00000008, 0x08020208, 0x00000200, 0x08000000, 0x08020200, 0x08000000, + 0x00020008, 0x00000208, 0x00020000, 0x08020200, 0x08000200, 0x00000000, + 0x00000200, 0x00020008, 0x08020208, 0x08000200, 0x08000008, 0x00000200, + 0x00000000, 0x08020008, 0x08000208, 0x00020000, 0x08000000, 0x08020208, + 0x00000008, 0x00020208, 0x00020200, 0x08000008, 0x08020000, 0x08000208, + 0x00000208, 0x08020000, 0x00020208, 0x00000008, 0x08020008, 0x00020200, + 0x00000208, 0x08020200, 0x00000000, 0x08020008, 0x08000200, 0x00000000, + 0x00020208, 0x08000200, 0x00020008, 0x08000008, 0x08000008, 0x00020000, + 0x08020208, 0x00020008, 0x08020000, 0x00000208, 0x08000000, 0x00000008, + 0x08020200, 0x00000200, 0x00020200, 0x08020000, 0x08020008, 0x00020208, + 0x08000208, 0x00020200, 0x00020000, 0x08000208, 0x00000008, 0x08020208, + 0x00000200, 0x08000000, 0x08020200, 0x08000000, 0x00020008, 0x00000208, + 0x00020000, 0x08020200, 0x08000200, 0x00000000, 0x00000200, 0x00020008, + 0x08020208, 0x08000200, 0x08000008, 0x00000200, 0x00000000, 0x08020008, + 0x08000208, 0x00020000, 0x08000000, 0x08020208, 0x00000008, 0x00020208, + 0x00020200, 0x08000008, 0x08020000, 0x08000208, 0x00000208, 0x08020000, + 0x00020208, 0x00000008, 0x08020008, 0x00020200 }; + +alignas(64) const uint32_t DES_SPBOX4[256] = { + 0x00802001, 0x00002081, 0x00002081, 0x00000080, 0x00802080, 0x00800081, + 0x00800001, 0x00002001, 0x00000000, 0x00802000, 0x00802000, 0x00802081, + 0x00000081, 0x00000000, 0x00800080, 0x00800001, 0x00000001, 0x00002000, + 0x00800000, 0x00802001, 0x00000080, 0x00800000, 0x00002001, 0x00002080, + 0x00800081, 0x00000001, 0x00002080, 0x00800080, 0x00002000, 0x00802080, + 0x00802081, 0x00000081, 0x00800080, 0x00800001, 0x00802000, 0x00802081, + 0x00000081, 0x00000000, 0x00000000, 0x00802000, 0x00002080, 0x00800080, + 0x00800081, 0x00000001, 0x00802001, 0x00002081, 0x00002081, 0x00000080, + 0x00802081, 0x00000081, 0x00000001, 0x00002000, 0x00800001, 0x00002001, + 0x00802080, 0x00800081, 0x00002001, 0x00002080, 0x00800000, 0x00802001, + 0x00000080, 0x00800000, 0x00002000, 0x00802080, 0x00802001, 0x00002081, + 0x00002081, 0x00000080, 0x00802080, 0x00800081, 0x00800001, 0x00002001, + 0x00000000, 0x00802000, 0x00802000, 0x00802081, 0x00000081, 0x00000000, + 0x00800080, 0x00800001, 0x00000001, 0x00002000, 0x00800000, 0x00802001, + 0x00000080, 0x00800000, 0x00002001, 0x00002080, 0x00800081, 0x00000001, + 0x00002080, 0x00800080, 0x00002000, 0x00802080, 0x00802081, 0x00000081, + 0x00800080, 0x00800001, 0x00802000, 0x00802081, 0x00000081, 0x00000000, + 0x00000000, 0x00802000, 0x00002080, 0x00800080, 0x00800081, 0x00000001, + 0x00802001, 0x00002081, 0x00002081, 0x00000080, 0x00802081, 0x00000081, + 0x00000001, 0x00002000, 0x00800001, 0x00002001, 0x00802080, 0x00800081, + 0x00002001, 0x00002080, 0x00800000, 0x00802001, 0x00000080, 0x00800000, + 0x00002000, 0x00802080, 0x00802001, 0x00002081, 0x00002081, 0x00000080, + 0x00802080, 0x00800081, 0x00800001, 0x00002001, 0x00000000, 0x00802000, + 0x00802000, 0x00802081, 0x00000081, 0x00000000, 0x00800080, 0x00800001, + 0x00000001, 0x00002000, 0x00800000, 0x00802001, 0x00000080, 0x00800000, + 0x00002001, 0x00002080, 0x00800081, 0x00000001, 0x00002080, 0x00800080, + 0x00002000, 0x00802080, 0x00802081, 0x00000081, 0x00800080, 0x00800001, + 0x00802000, 0x00802081, 0x00000081, 0x00000000, 0x00000000, 0x00802000, + 0x00002080, 0x00800080, 0x00800081, 0x00000001, 0x00802001, 0x00002081, + 0x00002081, 0x00000080, 0x00802081, 0x00000081, 0x00000001, 0x00002000, + 0x00800001, 0x00002001, 0x00802080, 0x00800081, 0x00002001, 0x00002080, + 0x00800000, 0x00802001, 0x00000080, 0x00800000, 0x00002000, 0x00802080, + 0x00802001, 0x00002081, 0x00002081, 0x00000080, 0x00802080, 0x00800081, + 0x00800001, 0x00002001, 0x00000000, 0x00802000, 0x00802000, 0x00802081, + 0x00000081, 0x00000000, 0x00800080, 0x00800001, 0x00000001, 0x00002000, + 0x00800000, 0x00802001, 0x00000080, 0x00800000, 0x00002001, 0x00002080, + 0x00800081, 0x00000001, 0x00002080, 0x00800080, 0x00002000, 0x00802080, + 0x00802081, 0x00000081, 0x00800080, 0x00800001, 0x00802000, 0x00802081, + 0x00000081, 0x00000000, 0x00000000, 0x00802000, 0x00002080, 0x00800080, + 0x00800081, 0x00000001, 0x00802001, 0x00002081, 0x00002081, 0x00000080, + 0x00802081, 0x00000081, 0x00000001, 0x00002000, 0x00800001, 0x00002001, + 0x00802080, 0x00800081, 0x00002001, 0x00002080, 0x00800000, 0x00802001, + 0x00000080, 0x00800000, 0x00002000, 0x00802080 }; + +alignas(64) const uint32_t DES_SPBOX5[256] = { + 0x00000100, 0x02080100, 0x02080000, 0x42000100, 0x00080000, 0x00000100, + 0x40000000, 0x02080000, 0x40080100, 0x00080000, 0x02000100, 0x40080100, + 0x42000100, 0x42080000, 0x00080100, 0x40000000, 0x02000000, 0x40080000, + 0x40080000, 0x00000000, 0x40000100, 0x42080100, 0x42080100, 0x02000100, + 0x42080000, 0x40000100, 0x00000000, 0x42000000, 0x02080100, 0x02000000, + 0x42000000, 0x00080100, 0x00080000, 0x42000100, 0x00000100, 0x02000000, + 0x40000000, 0x02080000, 0x42000100, 0x40080100, 0x02000100, 0x40000000, + 0x42080000, 0x02080100, 0x40080100, 0x00000100, 0x02000000, 0x42080000, + 0x42080100, 0x00080100, 0x42000000, 0x42080100, 0x02080000, 0x00000000, + 0x40080000, 0x42000000, 0x00080100, 0x02000100, 0x40000100, 0x00080000, + 0x00000000, 0x40080000, 0x02080100, 0x40000100, 0x00000100, 0x02080100, + 0x02080000, 0x42000100, 0x00080000, 0x00000100, 0x40000000, 0x02080000, + 0x40080100, 0x00080000, 0x02000100, 0x40080100, 0x42000100, 0x42080000, + 0x00080100, 0x40000000, 0x02000000, 0x40080000, 0x40080000, 0x00000000, + 0x40000100, 0x42080100, 0x42080100, 0x02000100, 0x42080000, 0x40000100, + 0x00000000, 0x42000000, 0x02080100, 0x02000000, 0x42000000, 0x00080100, + 0x00080000, 0x42000100, 0x00000100, 0x02000000, 0x40000000, 0x02080000, + 0x42000100, 0x40080100, 0x02000100, 0x40000000, 0x42080000, 0x02080100, + 0x40080100, 0x00000100, 0x02000000, 0x42080000, 0x42080100, 0x00080100, + 0x42000000, 0x42080100, 0x02080000, 0x00000000, 0x40080000, 0x42000000, + 0x00080100, 0x02000100, 0x40000100, 0x00080000, 0x00000000, 0x40080000, + 0x02080100, 0x40000100, 0x00000100, 0x02080100, 0x02080000, 0x42000100, + 0x00080000, 0x00000100, 0x40000000, 0x02080000, 0x40080100, 0x00080000, + 0x02000100, 0x40080100, 0x42000100, 0x42080000, 0x00080100, 0x40000000, + 0x02000000, 0x40080000, 0x40080000, 0x00000000, 0x40000100, 0x42080100, + 0x42080100, 0x02000100, 0x42080000, 0x40000100, 0x00000000, 0x42000000, + 0x02080100, 0x02000000, 0x42000000, 0x00080100, 0x00080000, 0x42000100, + 0x00000100, 0x02000000, 0x40000000, 0x02080000, 0x42000100, 0x40080100, + 0x02000100, 0x40000000, 0x42080000, 0x02080100, 0x40080100, 0x00000100, + 0x02000000, 0x42080000, 0x42080100, 0x00080100, 0x42000000, 0x42080100, + 0x02080000, 0x00000000, 0x40080000, 0x42000000, 0x00080100, 0x02000100, + 0x40000100, 0x00080000, 0x00000000, 0x40080000, 0x02080100, 0x40000100, + 0x00000100, 0x02080100, 0x02080000, 0x42000100, 0x00080000, 0x00000100, + 0x40000000, 0x02080000, 0x40080100, 0x00080000, 0x02000100, 0x40080100, + 0x42000100, 0x42080000, 0x00080100, 0x40000000, 0x02000000, 0x40080000, + 0x40080000, 0x00000000, 0x40000100, 0x42080100, 0x42080100, 0x02000100, + 0x42080000, 0x40000100, 0x00000000, 0x42000000, 0x02080100, 0x02000000, + 0x42000000, 0x00080100, 0x00080000, 0x42000100, 0x00000100, 0x02000000, + 0x40000000, 0x02080000, 0x42000100, 0x40080100, 0x02000100, 0x40000000, + 0x42080000, 0x02080100, 0x40080100, 0x00000100, 0x02000000, 0x42080000, + 0x42080100, 0x00080100, 0x42000000, 0x42080100, 0x02080000, 0x00000000, + 0x40080000, 0x42000000, 0x00080100, 0x02000100, 0x40000100, 0x00080000, + 0x00000000, 0x40080000, 0x02080100, 0x40000100 }; + +alignas(64) const uint32_t DES_SPBOX6[256] = { + 0x20000010, 0x20400000, 0x00004000, 0x20404010, 0x20400000, 0x00000010, + 0x20404010, 0x00400000, 0x20004000, 0x00404010, 0x00400000, 0x20000010, + 0x00400010, 0x20004000, 0x20000000, 0x00004010, 0x00000000, 0x00400010, + 0x20004010, 0x00004000, 0x00404000, 0x20004010, 0x00000010, 0x20400010, + 0x20400010, 0x00000000, 0x00404010, 0x20404000, 0x00004010, 0x00404000, + 0x20404000, 0x20000000, 0x20004000, 0x00000010, 0x20400010, 0x00404000, + 0x20404010, 0x00400000, 0x00004010, 0x20000010, 0x00400000, 0x20004000, + 0x20000000, 0x00004010, 0x20000010, 0x20404010, 0x00404000, 0x20400000, + 0x00404010, 0x20404000, 0x00000000, 0x20400010, 0x00000010, 0x00004000, + 0x20400000, 0x00404010, 0x00004000, 0x00400010, 0x20004010, 0x00000000, + 0x20404000, 0x20000000, 0x00400010, 0x20004010, 0x20000010, 0x20400000, + 0x00004000, 0x20404010, 0x20400000, 0x00000010, 0x20404010, 0x00400000, + 0x20004000, 0x00404010, 0x00400000, 0x20000010, 0x00400010, 0x20004000, + 0x20000000, 0x00004010, 0x00000000, 0x00400010, 0x20004010, 0x00004000, + 0x00404000, 0x20004010, 0x00000010, 0x20400010, 0x20400010, 0x00000000, + 0x00404010, 0x20404000, 0x00004010, 0x00404000, 0x20404000, 0x20000000, + 0x20004000, 0x00000010, 0x20400010, 0x00404000, 0x20404010, 0x00400000, + 0x00004010, 0x20000010, 0x00400000, 0x20004000, 0x20000000, 0x00004010, + 0x20000010, 0x20404010, 0x00404000, 0x20400000, 0x00404010, 0x20404000, + 0x00000000, 0x20400010, 0x00000010, 0x00004000, 0x20400000, 0x00404010, + 0x00004000, 0x00400010, 0x20004010, 0x00000000, 0x20404000, 0x20000000, + 0x00400010, 0x20004010, 0x20000010, 0x20400000, 0x00004000, 0x20404010, + 0x20400000, 0x00000010, 0x20404010, 0x00400000, 0x20004000, 0x00404010, + 0x00400000, 0x20000010, 0x00400010, 0x20004000, 0x20000000, 0x00004010, + 0x00000000, 0x00400010, 0x20004010, 0x00004000, 0x00404000, 0x20004010, + 0x00000010, 0x20400010, 0x20400010, 0x00000000, 0x00404010, 0x20404000, + 0x00004010, 0x00404000, 0x20404000, 0x20000000, 0x20004000, 0x00000010, + 0x20400010, 0x00404000, 0x20404010, 0x00400000, 0x00004010, 0x20000010, + 0x00400000, 0x20004000, 0x20000000, 0x00004010, 0x20000010, 0x20404010, + 0x00404000, 0x20400000, 0x00404010, 0x20404000, 0x00000000, 0x20400010, + 0x00000010, 0x00004000, 0x20400000, 0x00404010, 0x00004000, 0x00400010, + 0x20004010, 0x00000000, 0x20404000, 0x20000000, 0x00400010, 0x20004010, + 0x20000010, 0x20400000, 0x00004000, 0x20404010, 0x20400000, 0x00000010, + 0x20404010, 0x00400000, 0x20004000, 0x00404010, 0x00400000, 0x20000010, + 0x00400010, 0x20004000, 0x20000000, 0x00004010, 0x00000000, 0x00400010, + 0x20004010, 0x00004000, 0x00404000, 0x20004010, 0x00000010, 0x20400010, + 0x20400010, 0x00000000, 0x00404010, 0x20404000, 0x00004010, 0x00404000, + 0x20404000, 0x20000000, 0x20004000, 0x00000010, 0x20400010, 0x00404000, + 0x20404010, 0x00400000, 0x00004010, 0x20000010, 0x00400000, 0x20004000, + 0x20000000, 0x00004010, 0x20000010, 0x20404010, 0x00404000, 0x20400000, + 0x00404010, 0x20404000, 0x00000000, 0x20400010, 0x00000010, 0x00004000, + 0x20400000, 0x00404010, 0x00004000, 0x00400010, 0x20004010, 0x00000000, + 0x20404000, 0x20000000, 0x00400010, 0x20004010 }; + +alignas(64) const uint32_t DES_SPBOX7[256] = { + 0x00200000, 0x04200002, 0x04000802, 0x00000000, 0x00000800, 0x04000802, + 0x00200802, 0x04200800, 0x04200802, 0x00200000, 0x00000000, 0x04000002, + 0x00000002, 0x04000000, 0x04200002, 0x00000802, 0x04000800, 0x00200802, + 0x00200002, 0x04000800, 0x04000002, 0x04200000, 0x04200800, 0x00200002, + 0x04200000, 0x00000800, 0x00000802, 0x04200802, 0x00200800, 0x00000002, + 0x04000000, 0x00200800, 0x04000000, 0x00200800, 0x00200000, 0x04000802, + 0x04000802, 0x04200002, 0x04200002, 0x00000002, 0x00200002, 0x04000000, + 0x04000800, 0x00200000, 0x04200800, 0x00000802, 0x00200802, 0x04200800, + 0x00000802, 0x04000002, 0x04200802, 0x04200000, 0x00200800, 0x00000000, + 0x00000002, 0x04200802, 0x00000000, 0x00200802, 0x04200000, 0x00000800, + 0x04000002, 0x04000800, 0x00000800, 0x00200002, 0x00200000, 0x04200002, + 0x04000802, 0x00000000, 0x00000800, 0x04000802, 0x00200802, 0x04200800, + 0x04200802, 0x00200000, 0x00000000, 0x04000002, 0x00000002, 0x04000000, + 0x04200002, 0x00000802, 0x04000800, 0x00200802, 0x00200002, 0x04000800, + 0x04000002, 0x04200000, 0x04200800, 0x00200002, 0x04200000, 0x00000800, + 0x00000802, 0x04200802, 0x00200800, 0x00000002, 0x04000000, 0x00200800, + 0x04000000, 0x00200800, 0x00200000, 0x04000802, 0x04000802, 0x04200002, + 0x04200002, 0x00000002, 0x00200002, 0x04000000, 0x04000800, 0x00200000, + 0x04200800, 0x00000802, 0x00200802, 0x04200800, 0x00000802, 0x04000002, + 0x04200802, 0x04200000, 0x00200800, 0x00000000, 0x00000002, 0x04200802, + 0x00000000, 0x00200802, 0x04200000, 0x00000800, 0x04000002, 0x04000800, + 0x00000800, 0x00200002, 0x00200000, 0x04200002, 0x04000802, 0x00000000, + 0x00000800, 0x04000802, 0x00200802, 0x04200800, 0x04200802, 0x00200000, + 0x00000000, 0x04000002, 0x00000002, 0x04000000, 0x04200002, 0x00000802, + 0x04000800, 0x00200802, 0x00200002, 0x04000800, 0x04000002, 0x04200000, + 0x04200800, 0x00200002, 0x04200000, 0x00000800, 0x00000802, 0x04200802, + 0x00200800, 0x00000002, 0x04000000, 0x00200800, 0x04000000, 0x00200800, + 0x00200000, 0x04000802, 0x04000802, 0x04200002, 0x04200002, 0x00000002, + 0x00200002, 0x04000000, 0x04000800, 0x00200000, 0x04200800, 0x00000802, + 0x00200802, 0x04200800, 0x00000802, 0x04000002, 0x04200802, 0x04200000, + 0x00200800, 0x00000000, 0x00000002, 0x04200802, 0x00000000, 0x00200802, + 0x04200000, 0x00000800, 0x04000002, 0x04000800, 0x00000800, 0x00200002, + 0x00200000, 0x04200002, 0x04000802, 0x00000000, 0x00000800, 0x04000802, + 0x00200802, 0x04200800, 0x04200802, 0x00200000, 0x00000000, 0x04000002, + 0x00000002, 0x04000000, 0x04200002, 0x00000802, 0x04000800, 0x00200802, + 0x00200002, 0x04000800, 0x04000002, 0x04200000, 0x04200800, 0x00200002, + 0x04200000, 0x00000800, 0x00000802, 0x04200802, 0x00200800, 0x00000002, + 0x04000000, 0x00200800, 0x04000000, 0x00200800, 0x00200000, 0x04000802, + 0x04000802, 0x04200002, 0x04200002, 0x00000002, 0x00200002, 0x04000000, + 0x04000800, 0x00200000, 0x04200800, 0x00000802, 0x00200802, 0x04200800, + 0x00000802, 0x04000002, 0x04200802, 0x04200000, 0x00200800, 0x00000000, + 0x00000002, 0x04200802, 0x00000000, 0x00200802, 0x04200000, 0x00000800, + 0x04000002, 0x04000800, 0x00000800, 0x00200002 }; + +alignas(64) const uint32_t DES_SPBOX8[256] = { + 0x10001040, 0x00001000, 0x00040000, 0x10041040, 0x10000000, 0x10001040, + 0x00000040, 0x10000000, 0x00040040, 0x10040000, 0x10041040, 0x00041000, + 0x10041000, 0x00041040, 0x00001000, 0x00000040, 0x10040000, 0x10000040, + 0x10001000, 0x00001040, 0x00041000, 0x00040040, 0x10040040, 0x10041000, + 0x00001040, 0x00000000, 0x00000000, 0x10040040, 0x10000040, 0x10001000, + 0x00041040, 0x00040000, 0x00041040, 0x00040000, 0x10041000, 0x00001000, + 0x00000040, 0x10040040, 0x00001000, 0x00041040, 0x10001000, 0x00000040, + 0x10000040, 0x10040000, 0x10040040, 0x10000000, 0x00040000, 0x10001040, + 0x00000000, 0x10041040, 0x00040040, 0x10000040, 0x10040000, 0x10001000, + 0x10001040, 0x00000000, 0x10041040, 0x00041000, 0x00041000, 0x00001040, + 0x00001040, 0x00040040, 0x10000000, 0x10041000, 0x10001040, 0x00001000, + 0x00040000, 0x10041040, 0x10000000, 0x10001040, 0x00000040, 0x10000000, + 0x00040040, 0x10040000, 0x10041040, 0x00041000, 0x10041000, 0x00041040, + 0x00001000, 0x00000040, 0x10040000, 0x10000040, 0x10001000, 0x00001040, + 0x00041000, 0x00040040, 0x10040040, 0x10041000, 0x00001040, 0x00000000, + 0x00000000, 0x10040040, 0x10000040, 0x10001000, 0x00041040, 0x00040000, + 0x00041040, 0x00040000, 0x10041000, 0x00001000, 0x00000040, 0x10040040, + 0x00001000, 0x00041040, 0x10001000, 0x00000040, 0x10000040, 0x10040000, + 0x10040040, 0x10000000, 0x00040000, 0x10001040, 0x00000000, 0x10041040, + 0x00040040, 0x10000040, 0x10040000, 0x10001000, 0x10001040, 0x00000000, + 0x10041040, 0x00041000, 0x00041000, 0x00001040, 0x00001040, 0x00040040, + 0x10000000, 0x10041000, 0x10001040, 0x00001000, 0x00040000, 0x10041040, + 0x10000000, 0x10001040, 0x00000040, 0x10000000, 0x00040040, 0x10040000, + 0x10041040, 0x00041000, 0x10041000, 0x00041040, 0x00001000, 0x00000040, + 0x10040000, 0x10000040, 0x10001000, 0x00001040, 0x00041000, 0x00040040, + 0x10040040, 0x10041000, 0x00001040, 0x00000000, 0x00000000, 0x10040040, + 0x10000040, 0x10001000, 0x00041040, 0x00040000, 0x00041040, 0x00040000, + 0x10041000, 0x00001000, 0x00000040, 0x10040040, 0x00001000, 0x00041040, + 0x10001000, 0x00000040, 0x10000040, 0x10040000, 0x10040040, 0x10000000, + 0x00040000, 0x10001040, 0x00000000, 0x10041040, 0x00040040, 0x10000040, + 0x10040000, 0x10001000, 0x10001040, 0x00000000, 0x10041040, 0x00041000, + 0x00041000, 0x00001040, 0x00001040, 0x00040040, 0x10000000, 0x10041000, + 0x10001040, 0x00001000, 0x00040000, 0x10041040, 0x10000000, 0x10001040, + 0x00000040, 0x10000000, 0x00040040, 0x10040000, 0x10041040, 0x00041000, + 0x10041000, 0x00041040, 0x00001000, 0x00000040, 0x10040000, 0x10000040, + 0x10001000, 0x00001040, 0x00041000, 0x00040040, 0x10040040, 0x10041000, + 0x00001040, 0x00000000, 0x00000000, 0x10040040, 0x10000040, 0x10001000, + 0x00041040, 0x00040000, 0x00041040, 0x00040000, 0x10041000, 0x00001000, + 0x00000040, 0x10040040, 0x00001000, 0x00041040, 0x10001000, 0x00000040, + 0x10000040, 0x10040000, 0x10040040, 0x10000000, 0x00040000, 0x10001040, + 0x00000000, 0x10041040, 0x00040040, 0x10000040, 0x10040000, 0x10001000, + 0x10001040, 0x00000000, 0x10041040, 0x00041000, 0x00041000, 0x00001040, + 0x00001040, 0x00040040, 0x10000000, 0x10041000 }; + +} diff --git a/comm/third_party/botan/src/lib/block/des/desx.cpp b/comm/third_party/botan/src/lib/block/des/desx.cpp new file mode 100644 index 0000000000..e869b3ebf8 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/des/desx.cpp @@ -0,0 +1,65 @@ +/* +* DES +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/desx.h> + +namespace Botan { + +/* +* DESX Encryption +*/ +void DESX::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_K1.empty() == false); + + for(size_t i = 0; i != blocks; ++i) + { + xor_buf(out, in, m_K1.data(), BLOCK_SIZE); + m_des.encrypt(out); + xor_buf(out, m_K2.data(), BLOCK_SIZE); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* DESX Decryption +*/ +void DESX::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_K1.empty() == false); + + for(size_t i = 0; i != blocks; ++i) + { + xor_buf(out, in, m_K2.data(), BLOCK_SIZE); + m_des.decrypt(out); + xor_buf(out, m_K1.data(), BLOCK_SIZE); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* DESX Key Schedule +*/ +void DESX::key_schedule(const uint8_t key[], size_t) + { + m_K1.assign(key, key + 8); + m_des.set_key(key + 8, 8); + m_K2.assign(key + 16, key + 24); + } + +void DESX::clear() + { + m_des.clear(); + zap(m_K1); + zap(m_K2); + } + +} diff --git a/comm/third_party/botan/src/lib/block/des/desx.h b/comm/third_party/botan/src/lib/block/des/desx.h new file mode 100644 index 0000000000..0189a99826 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/des/desx.h @@ -0,0 +1,37 @@ +/* +* DESX +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_DESX_H_ +#define BOTAN_DESX_H_ + +#include <botan/des.h> + +BOTAN_FUTURE_INTERNAL_HEADER(desx.h) + +namespace Botan { + +/** +* DESX +*/ +class BOTAN_PUBLIC_API(2,0) DESX final : public Block_Cipher_Fixed_Params<8, 24> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string name() const override { return "DESX"; } + BlockCipher* clone() const override { return new DESX; } + private: + void key_schedule(const uint8_t[], size_t) override; + secure_vector<uint8_t> m_K1, m_K2; + DES m_des; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/des/info.txt b/comm/third_party/botan/src/lib/block/des/info.txt new file mode 100644 index 0000000000..05f85b523c --- /dev/null +++ b/comm/third_party/botan/src/lib/block/des/info.txt @@ -0,0 +1,3 @@ +<defines> +DES -> 20131128 +</defines> diff --git a/comm/third_party/botan/src/lib/block/gost_28147/gost_28147.cpp b/comm/third_party/botan/src/lib/block/gost_28147/gost_28147.cpp new file mode 100644 index 0000000000..2b8aa031e8 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/gost_28147/gost_28147.cpp @@ -0,0 +1,189 @@ +/* +* GOST 28147-89 +* (C) 1999-2009,2011 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/gost_28147.h> +#include <botan/exceptn.h> +#include <botan/loadstor.h> +#include <botan/rotate.h> + +namespace Botan { + +uint8_t GOST_28147_89_Params::sbox_entry(size_t row, size_t col) const + { + const uint8_t x = m_sboxes[4 * col + (row / 2)]; + return (row % 2 == 0) ? (x >> 4) : (x & 0x0F); + } + +uint8_t GOST_28147_89_Params::sbox_pair(size_t row, size_t col) const + { + const uint8_t x = m_sboxes[4 * (col % 16) + row]; + const uint8_t y = m_sboxes[4 * (col / 16) + row]; + return (x >> 4) | (y << 4); + } + +GOST_28147_89_Params::GOST_28147_89_Params(const std::string& n) : m_name(n) + { + // Encoded in the packed fromat from RFC 4357 + + // GostR3411_94_TestParamSet (OID 1.2.643.2.2.31.0) + static const uint8_t GOST_R_3411_TEST_PARAMS[64] = { + 0x4E, 0x57, 0x64, 0xD1, 0xAB, 0x8D, 0xCB, 0xBF, 0x94, 0x1A, 0x7A, + 0x4D, 0x2C, 0xD1, 0x10, 0x10, 0xD6, 0xA0, 0x57, 0x35, 0x8D, 0x38, + 0xF2, 0xF7, 0x0F, 0x49, 0xD1, 0x5A, 0xEA, 0x2F, 0x8D, 0x94, 0x62, + 0xEE, 0x43, 0x09, 0xB3, 0xF4, 0xA6, 0xA2, 0x18, 0xC6, 0x98, 0xE3, + 0xC1, 0x7C, 0xE5, 0x7E, 0x70, 0x6B, 0x09, 0x66, 0xF7, 0x02, 0x3C, + 0x8B, 0x55, 0x95, 0xBF, 0x28, 0x39, 0xB3, 0x2E, 0xCC }; + + // GostR3411-94-CryptoProParamSet (OID 1.2.643.2.2.31.1) + static const uint8_t GOST_R_3411_CRYPTOPRO_PARAMS[64] = { + 0xA5, 0x74, 0x77, 0xD1, 0x4F, 0xFA, 0x66, 0xE3, 0x54, 0xC7, 0x42, + 0x4A, 0x60, 0xEC, 0xB4, 0x19, 0x82, 0x90, 0x9D, 0x75, 0x1D, 0x4F, + 0xC9, 0x0B, 0x3B, 0x12, 0x2F, 0x54, 0x79, 0x08, 0xA0, 0xAF, 0xD1, + 0x3E, 0x1A, 0x38, 0xC7, 0xB1, 0x81, 0xC6, 0xE6, 0x56, 0x05, 0x87, + 0x03, 0x25, 0xEB, 0xFE, 0x9C, 0x6D, 0xF8, 0x6D, 0x2E, 0xAB, 0xDE, + 0x20, 0xBA, 0x89, 0x3C, 0x92, 0xF8, 0xD3, 0x53, 0xBC }; + + if(m_name == "R3411_94_TestParam") + m_sboxes = GOST_R_3411_TEST_PARAMS; + else if(m_name == "R3411_CryptoPro") + m_sboxes = GOST_R_3411_CRYPTOPRO_PARAMS; + else + throw Invalid_Argument("GOST_28147_89_Params: Unknown " + m_name); + } + +/* +* GOST Constructor +*/ +GOST_28147_89::GOST_28147_89(const GOST_28147_89_Params& param) : m_SBOX(1024) + { + // Convert the parallel 4x4 sboxes into larger word-based sboxes + + for(size_t i = 0; i != 256; ++i) + { + m_SBOX[i ] = rotl<11, uint32_t>(param.sbox_pair(0, i)); + m_SBOX[i+256] = rotl<19, uint32_t>(param.sbox_pair(1, i)); + m_SBOX[i+512] = rotl<27, uint32_t>(param.sbox_pair(2, i)); + m_SBOX[i+768] = rotl< 3, uint32_t>(param.sbox_pair(3, i)); + } + } + +std::string GOST_28147_89::name() const + { + /* + 'Guess' the right name for the sbox on the basis of the values. + This would need to be updated if support for other sbox parameters + is added. Preferably, we would just store the string value in the + constructor, but can't break binary compat. + */ + std::string sbox_name = ""; + if(m_SBOX[0] == 0x00072000) + sbox_name = "R3411_94_TestParam"; + else if(m_SBOX[0] == 0x0002D000) + sbox_name = "R3411_CryptoPro"; + else + throw Internal_Error("GOST-28147 unrecognized sbox value"); + + return "GOST-28147-89(" + sbox_name + ")"; + } + +/* +* Two rounds of GOST +*/ +#define GOST_2ROUND(N1, N2, R1, R2) \ + do { \ + uint32_t T0 = N1 + m_EK[R1]; \ + N2 ^= m_SBOX[get_byte(3, T0)] | \ + m_SBOX[get_byte(2, T0)+256] | \ + m_SBOX[get_byte(1, T0)+512] | \ + m_SBOX[get_byte(0, T0)+768]; \ + \ + uint32_t T1 = N2 + m_EK[R2]; \ + N1 ^= m_SBOX[get_byte(3, T1)] | \ + m_SBOX[get_byte(2, T1)+256] | \ + m_SBOX[get_byte(1, T1)+512] | \ + m_SBOX[get_byte(0, T1)+768]; \ + } while(0) + +/* +* GOST Encryption +*/ +void GOST_28147_89::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_EK.empty() == false); + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t N1 = load_le<uint32_t>(in, 0); + uint32_t N2 = load_le<uint32_t>(in, 1); + + for(size_t j = 0; j != 3; ++j) + { + GOST_2ROUND(N1, N2, 0, 1); + GOST_2ROUND(N1, N2, 2, 3); + GOST_2ROUND(N1, N2, 4, 5); + GOST_2ROUND(N1, N2, 6, 7); + } + + GOST_2ROUND(N1, N2, 7, 6); + GOST_2ROUND(N1, N2, 5, 4); + GOST_2ROUND(N1, N2, 3, 2); + GOST_2ROUND(N1, N2, 1, 0); + + store_le(out, N2, N1); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* GOST Decryption +*/ +void GOST_28147_89::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_EK.empty() == false); + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t N1 = load_le<uint32_t>(in, 0); + uint32_t N2 = load_le<uint32_t>(in, 1); + + GOST_2ROUND(N1, N2, 0, 1); + GOST_2ROUND(N1, N2, 2, 3); + GOST_2ROUND(N1, N2, 4, 5); + GOST_2ROUND(N1, N2, 6, 7); + + for(size_t j = 0; j != 3; ++j) + { + GOST_2ROUND(N1, N2, 7, 6); + GOST_2ROUND(N1, N2, 5, 4); + GOST_2ROUND(N1, N2, 3, 2); + GOST_2ROUND(N1, N2, 1, 0); + } + + store_le(out, N2, N1); + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* GOST Key Schedule +*/ +void GOST_28147_89::key_schedule(const uint8_t key[], size_t) + { + m_EK.resize(8); + for(size_t i = 0; i != 8; ++i) + m_EK[i] = load_le<uint32_t>(key, i); + } + +void GOST_28147_89::clear() + { + zap(m_EK); + } + +} diff --git a/comm/third_party/botan/src/lib/block/gost_28147/gost_28147.h b/comm/third_party/botan/src/lib/block/gost_28147/gost_28147.h new file mode 100644 index 0000000000..f71bb28bd9 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/gost_28147/gost_28147.h @@ -0,0 +1,95 @@ +/* +* GOST 28147-89 +* (C) 1999-2009 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_GOST_28147_89_H_ +#define BOTAN_GOST_28147_89_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(gost_28147.h) + +namespace Botan { + +/** +* The GOST 28147-89 block cipher uses a set of 4 bit Sboxes, however +* the standard does not actually define these Sboxes; they are +* considered a local configuration issue. Several different sets are +* used. +*/ +class BOTAN_PUBLIC_API(2,0) GOST_28147_89_Params final + { + public: + /** + * @param row the row + * @param col the column + * @return sbox entry at this row/column + */ + uint8_t sbox_entry(size_t row, size_t col) const; + + /** + * @return name of this parameter set + */ + std::string param_name() const { return m_name; } + + /** + * Return a representation used for building larger tables + * For internal use + */ + uint8_t sbox_pair(size_t row, size_t col) const; + + /** + * Default GOST parameters are the ones given in GOST R 34.11 for + * testing purposes; these sboxes are also used by Crypto++, and, + * at least according to Wikipedia, the Central Bank of Russian + * Federation + * @param name of the parameter set + */ + explicit GOST_28147_89_Params(const std::string& name = "R3411_94_TestParam"); + private: + const uint8_t* m_sboxes; + std::string m_name; + }; + +/** +* GOST 28147-89 +*/ +class BOTAN_PUBLIC_API(2,0) GOST_28147_89 final : public Block_Cipher_Fixed_Params<8, 32> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + + std::string name() const override; + BlockCipher* clone() const override { return new GOST_28147_89(m_SBOX); } + + /** + * @param params the sbox parameters to use + */ + explicit GOST_28147_89(const GOST_28147_89_Params& params); + + explicit GOST_28147_89(const std::string& param_name) : + GOST_28147_89(GOST_28147_89_Params(param_name)) {} + private: + explicit GOST_28147_89(const std::vector<uint32_t>& other_SBOX) : + m_SBOX(other_SBOX), m_EK(8) {} + + void key_schedule(const uint8_t[], size_t) override; + + /* + * The sbox is not secret, this is just a larger expansion of it + * which we generate at runtime for faster execution + */ + std::vector<uint32_t> m_SBOX; + + secure_vector<uint32_t> m_EK; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/gost_28147/info.txt b/comm/third_party/botan/src/lib/block/gost_28147/info.txt new file mode 100644 index 0000000000..17fc971591 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/gost_28147/info.txt @@ -0,0 +1,3 @@ +<defines> +GOST_28147_89 -> 20131128 +</defines> diff --git a/comm/third_party/botan/src/lib/block/idea/idea.cpp b/comm/third_party/botan/src/lib/block/idea/idea.cpp new file mode 100644 index 0000000000..f8f5ceb348 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/idea/idea.cpp @@ -0,0 +1,240 @@ +/* +* IDEA +* (C) 1999-2010,2015 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/idea.h> +#include <botan/loadstor.h> +#include <botan/cpuid.h> +#include <botan/internal/ct_utils.h> + +namespace Botan { + +namespace { + +/* +* Multiplication modulo 65537 +*/ +inline uint16_t mul(uint16_t x, uint16_t y) + { + const uint32_t P = static_cast<uint32_t>(x) * y; + const auto P_mask = CT::Mask<uint16_t>(CT::Mask<uint32_t>::is_zero(P)); + + const uint32_t P_hi = P >> 16; + const uint32_t P_lo = P & 0xFFFF; + + const uint16_t carry = (P_lo < P_hi); + const uint16_t r_1 = static_cast<uint16_t>((P_lo - P_hi) + carry); + const uint16_t r_2 = 1 - x - y; + + return P_mask.select(r_2, r_1); + } + +/* +* Find multiplicative inverses modulo 65537 +* +* 65537 is prime; thus Fermat's little theorem tells us that +* x^65537 == x modulo 65537, which means +* x^(65537-2) == x^-1 modulo 65537 since +* x^(65537-2) * x == 1 mod 65537 +* +* Do the exponentiation with a basic square and multiply: all bits are +* of exponent are 1 so we always multiply +*/ +uint16_t mul_inv(uint16_t x) + { + uint16_t y = x; + + for(size_t i = 0; i != 15; ++i) + { + y = mul(y, y); // square + y = mul(y, x); + } + + return y; + } + +/** +* IDEA is involutional, depending only on the key schedule +*/ +void idea_op(const uint8_t in[], uint8_t out[], size_t blocks, const uint16_t K[52]) + { + const size_t BLOCK_SIZE = 8; + + CT::poison(in, blocks * 8); + CT::poison(out, blocks * 8); + CT::poison(K, 52); + + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks; ++i) + { + uint16_t X1, X2, X3, X4; + load_be(in + BLOCK_SIZE*i, X1, X2, X3, X4); + + for(size_t j = 0; j != 8; ++j) + { + X1 = mul(X1, K[6*j+0]); + X2 += K[6*j+1]; + X3 += K[6*j+2]; + X4 = mul(X4, K[6*j+3]); + + const uint16_t T0 = X3; + X3 = mul(X3 ^ X1, K[6*j+4]); + + const uint16_t T1 = X2; + X2 = mul((X2 ^ X4) + X3, K[6*j+5]); + X3 += X2; + + X1 ^= X2; + X4 ^= X3; + X2 ^= T0; + X3 ^= T1; + } + + X1 = mul(X1, K[48]); + X2 += K[50]; + X3 += K[49]; + X4 = mul(X4, K[51]); + + store_be(out + BLOCK_SIZE*i, X1, X3, X2, X4); + } + + CT::unpoison(in, blocks * 8); + CT::unpoison(out, blocks * 8); + CT::unpoison(K, 52); + } + +} + +size_t IDEA::parallelism() const + { +#if defined(BOTAN_HAS_IDEA_SSE2) + if(CPUID::has_sse2()) + { + return 8; + } +#endif + + return 1; + } + +std::string IDEA::provider() const + { +#if defined(BOTAN_HAS_IDEA_SSE2) + if(CPUID::has_sse2()) + { + return "sse2"; + } +#endif + + return "base"; + } + +/* +* IDEA Encryption +*/ +void IDEA::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_EK.empty() == false); + +#if defined(BOTAN_HAS_IDEA_SSE2) + if(CPUID::has_sse2()) + { + while(blocks >= 8) + { + sse2_idea_op_8(in, out, m_EK.data()); + in += 8 * BLOCK_SIZE; + out += 8 * BLOCK_SIZE; + blocks -= 8; + } + } +#endif + + idea_op(in, out, blocks, m_EK.data()); + } + +/* +* IDEA Decryption +*/ +void IDEA::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_DK.empty() == false); + +#if defined(BOTAN_HAS_IDEA_SSE2) + if(CPUID::has_sse2()) + { + while(blocks >= 8) + { + sse2_idea_op_8(in, out, m_DK.data()); + in += 8 * BLOCK_SIZE; + out += 8 * BLOCK_SIZE; + blocks -= 8; + } + } +#endif + + idea_op(in, out, blocks, m_DK.data()); + } + +/* +* IDEA Key Schedule +*/ +void IDEA::key_schedule(const uint8_t key[], size_t) + { + m_EK.resize(52); + m_DK.resize(52); + + CT::poison(key, 16); + CT::poison(m_EK.data(), 52); + CT::poison(m_DK.data(), 52); + + secure_vector<uint64_t> K(2); + + K[0] = load_be<uint64_t>(key, 0); + K[1] = load_be<uint64_t>(key, 1); + + for(size_t off = 0; off != 48; off += 8) + { + for(size_t i = 0; i != 8; ++i) + m_EK[off+i] = static_cast<uint16_t>(K[i/4] >> (48-16*(i % 4))); + + const uint64_t Kx = (K[0] >> 39); + const uint64_t Ky = (K[1] >> 39); + + K[0] = (K[0] << 25) | Ky; + K[1] = (K[1] << 25) | Kx; + } + + for(size_t i = 0; i != 4; ++i) + m_EK[48+i] = static_cast<uint16_t>(K[i/4] >> (48-16*(i % 4))); + + m_DK[0] = mul_inv(m_EK[48]); + m_DK[1] = -m_EK[49]; + m_DK[2] = -m_EK[50]; + m_DK[3] = mul_inv(m_EK[51]); + + for(size_t i = 0; i != 8*6; i += 6) + { + m_DK[i+4] = m_EK[46-i]; + m_DK[i+5] = m_EK[47-i]; + m_DK[i+6] = mul_inv(m_EK[42-i]); + m_DK[i+7] = -m_EK[44-i]; + m_DK[i+8] = -m_EK[43-i]; + m_DK[i+9] = mul_inv(m_EK[45-i]); + } + + std::swap(m_DK[49], m_DK[50]); + + CT::unpoison(key, 16); + CT::unpoison(m_EK.data(), 52); + CT::unpoison(m_DK.data(), 52); + } + +void IDEA::clear() + { + zap(m_EK); + zap(m_DK); + } + +} diff --git a/comm/third_party/botan/src/lib/block/idea/idea.h b/comm/third_party/botan/src/lib/block/idea/idea.h new file mode 100644 index 0000000000..e5e51606b9 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/idea/idea.h @@ -0,0 +1,45 @@ +/* +* IDEA +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_IDEA_H_ +#define BOTAN_IDEA_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(idea.h) + +namespace Botan { + +/** +* IDEA +*/ +class BOTAN_PUBLIC_API(2,0) IDEA final : public Block_Cipher_Fixed_Params<8, 16> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + + std::string provider() const override; + std::string name() const override { return "IDEA"; } + BlockCipher* clone() const override { return new IDEA; } + size_t parallelism() const override; + + private: +#if defined(BOTAN_HAS_IDEA_SSE2) + void sse2_idea_op_8(const uint8_t in[64], uint8_t out[64], const uint16_t EK[52]) const; +#endif + + void key_schedule(const uint8_t[], size_t) override; + + secure_vector<uint16_t> m_EK, m_DK; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/idea/idea_sse2/idea_sse2.cpp b/comm/third_party/botan/src/lib/block/idea/idea_sse2/idea_sse2.cpp new file mode 100644 index 0000000000..93648cfc7a --- /dev/null +++ b/comm/third_party/botan/src/lib/block/idea/idea_sse2/idea_sse2.cpp @@ -0,0 +1,208 @@ +/* +* IDEA in SSE2 +* (C) 2009 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/idea.h> +#include <botan/internal/ct_utils.h> +#include <emmintrin.h> + +namespace Botan { + +namespace { + +BOTAN_FUNC_ISA("sse2") +inline __m128i mul(__m128i X, uint16_t K_16) + { + const __m128i zeros = _mm_set1_epi16(0); + const __m128i ones = _mm_set1_epi16(1); + + const __m128i K = _mm_set1_epi16(K_16); + + const __m128i X_is_zero = _mm_cmpeq_epi16(X, zeros); + const __m128i K_is_zero = _mm_cmpeq_epi16(K, zeros); + + const __m128i mul_lo = _mm_mullo_epi16(X, K); + const __m128i mul_hi = _mm_mulhi_epu16(X, K); + + __m128i T = _mm_sub_epi16(mul_lo, mul_hi); + + // Unsigned compare; cmp = 1 if mul_lo < mul_hi else 0 + const __m128i subs = _mm_subs_epu16(mul_hi, mul_lo); + const __m128i cmp = _mm_min_epu8( + _mm_or_si128(subs, _mm_srli_epi16(subs, 8)), ones); + + T = _mm_add_epi16(T, cmp); + + /* Selection: if X[i] is zero then assign 1-K + if K is zero then assign 1-X[i] + + Could if() off value of K_16 for the second, but this gives a + constant time implementation which is a nice bonus. + */ + + T = _mm_or_si128( + _mm_andnot_si128(X_is_zero, T), + _mm_and_si128(_mm_sub_epi16(ones, K), X_is_zero)); + + T = _mm_or_si128( + _mm_andnot_si128(K_is_zero, T), + _mm_and_si128(_mm_sub_epi16(ones, X), K_is_zero)); + + return T; + } + +/* +* 4x8 matrix transpose +* +* FIXME: why do I need the extra set of unpack_epi32 here? Inverse in +* transpose_out doesn't need it. Something with the shuffle? Removing +* that extra unpack could easily save 3-4 cycles per block, and would +* also help a lot with register pressure on 32-bit x86 +*/ +BOTAN_FUNC_ISA("sse2") +void transpose_in(__m128i& B0, __m128i& B1, __m128i& B2, __m128i& B3) + { + __m128i T0 = _mm_unpackhi_epi32(B0, B1); + __m128i T1 = _mm_unpacklo_epi32(B0, B1); + __m128i T2 = _mm_unpackhi_epi32(B2, B3); + __m128i T3 = _mm_unpacklo_epi32(B2, B3); + + __m128i T4 = _mm_unpacklo_epi32(T0, T1); + __m128i T5 = _mm_unpackhi_epi32(T0, T1); + __m128i T6 = _mm_unpacklo_epi32(T2, T3); + __m128i T7 = _mm_unpackhi_epi32(T2, T3); + + T0 = _mm_shufflehi_epi16(T4, _MM_SHUFFLE(1, 3, 0, 2)); + T1 = _mm_shufflehi_epi16(T5, _MM_SHUFFLE(1, 3, 0, 2)); + T2 = _mm_shufflehi_epi16(T6, _MM_SHUFFLE(1, 3, 0, 2)); + T3 = _mm_shufflehi_epi16(T7, _MM_SHUFFLE(1, 3, 0, 2)); + + T0 = _mm_shufflelo_epi16(T0, _MM_SHUFFLE(1, 3, 0, 2)); + T1 = _mm_shufflelo_epi16(T1, _MM_SHUFFLE(1, 3, 0, 2)); + T2 = _mm_shufflelo_epi16(T2, _MM_SHUFFLE(1, 3, 0, 2)); + T3 = _mm_shufflelo_epi16(T3, _MM_SHUFFLE(1, 3, 0, 2)); + + T0 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3, 1, 2, 0)); + T1 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(3, 1, 2, 0)); + T2 = _mm_shuffle_epi32(T2, _MM_SHUFFLE(3, 1, 2, 0)); + T3 = _mm_shuffle_epi32(T3, _MM_SHUFFLE(3, 1, 2, 0)); + + B0 = _mm_unpacklo_epi64(T0, T2); + B1 = _mm_unpackhi_epi64(T0, T2); + B2 = _mm_unpacklo_epi64(T1, T3); + B3 = _mm_unpackhi_epi64(T1, T3); + } + +/* +* 4x8 matrix transpose (reverse) +*/ +BOTAN_FUNC_ISA("sse2") +void transpose_out(__m128i& B0, __m128i& B1, __m128i& B2, __m128i& B3) + { + __m128i T0 = _mm_unpacklo_epi64(B0, B1); + __m128i T1 = _mm_unpacklo_epi64(B2, B3); + __m128i T2 = _mm_unpackhi_epi64(B0, B1); + __m128i T3 = _mm_unpackhi_epi64(B2, B3); + + T0 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3, 1, 2, 0)); + T1 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(3, 1, 2, 0)); + T2 = _mm_shuffle_epi32(T2, _MM_SHUFFLE(3, 1, 2, 0)); + T3 = _mm_shuffle_epi32(T3, _MM_SHUFFLE(3, 1, 2, 0)); + + T0 = _mm_shufflehi_epi16(T0, _MM_SHUFFLE(3, 1, 2, 0)); + T1 = _mm_shufflehi_epi16(T1, _MM_SHUFFLE(3, 1, 2, 0)); + T2 = _mm_shufflehi_epi16(T2, _MM_SHUFFLE(3, 1, 2, 0)); + T3 = _mm_shufflehi_epi16(T3, _MM_SHUFFLE(3, 1, 2, 0)); + + T0 = _mm_shufflelo_epi16(T0, _MM_SHUFFLE(3, 1, 2, 0)); + T1 = _mm_shufflelo_epi16(T1, _MM_SHUFFLE(3, 1, 2, 0)); + T2 = _mm_shufflelo_epi16(T2, _MM_SHUFFLE(3, 1, 2, 0)); + T3 = _mm_shufflelo_epi16(T3, _MM_SHUFFLE(3, 1, 2, 0)); + + B0 = _mm_unpacklo_epi32(T0, T1); + B1 = _mm_unpackhi_epi32(T0, T1); + B2 = _mm_unpacklo_epi32(T2, T3); + B3 = _mm_unpackhi_epi32(T2, T3); + } + +} + +/* +* 8 wide IDEA encryption/decryption in SSE2 +*/ +BOTAN_FUNC_ISA("sse2") +void IDEA::sse2_idea_op_8(const uint8_t in[64], uint8_t out[64], const uint16_t EK[52]) const + { + CT::poison(in, 64); + CT::poison(out, 64); + CT::poison(EK, 52); + + const __m128i* in_mm = reinterpret_cast<const __m128i*>(in); + + __m128i B0 = _mm_loadu_si128(in_mm + 0); + __m128i B1 = _mm_loadu_si128(in_mm + 1); + __m128i B2 = _mm_loadu_si128(in_mm + 2); + __m128i B3 = _mm_loadu_si128(in_mm + 3); + + transpose_in(B0, B1, B2, B3); + + // byte swap + B0 = _mm_or_si128(_mm_slli_epi16(B0, 8), _mm_srli_epi16(B0, 8)); + B1 = _mm_or_si128(_mm_slli_epi16(B1, 8), _mm_srli_epi16(B1, 8)); + B2 = _mm_or_si128(_mm_slli_epi16(B2, 8), _mm_srli_epi16(B2, 8)); + B3 = _mm_or_si128(_mm_slli_epi16(B3, 8), _mm_srli_epi16(B3, 8)); + + for(size_t i = 0; i != 8; ++i) + { + B0 = mul(B0, EK[6*i+0]); + B1 = _mm_add_epi16(B1, _mm_set1_epi16(EK[6*i+1])); + B2 = _mm_add_epi16(B2, _mm_set1_epi16(EK[6*i+2])); + B3 = mul(B3, EK[6*i+3]); + + __m128i T0 = B2; + B2 = _mm_xor_si128(B2, B0); + B2 = mul(B2, EK[6*i+4]); + + __m128i T1 = B1; + + B1 = _mm_xor_si128(B1, B3); + B1 = _mm_add_epi16(B1, B2); + B1 = mul(B1, EK[6*i+5]); + + B2 = _mm_add_epi16(B2, B1); + + B0 = _mm_xor_si128(B0, B1); + B1 = _mm_xor_si128(B1, T0); + B3 = _mm_xor_si128(B3, B2); + B2 = _mm_xor_si128(B2, T1); + } + + B0 = mul(B0, EK[48]); + B1 = _mm_add_epi16(B1, _mm_set1_epi16(EK[50])); + B2 = _mm_add_epi16(B2, _mm_set1_epi16(EK[49])); + B3 = mul(B3, EK[51]); + + // byte swap + B0 = _mm_or_si128(_mm_slli_epi16(B0, 8), _mm_srli_epi16(B0, 8)); + B1 = _mm_or_si128(_mm_slli_epi16(B1, 8), _mm_srli_epi16(B1, 8)); + B2 = _mm_or_si128(_mm_slli_epi16(B2, 8), _mm_srli_epi16(B2, 8)); + B3 = _mm_or_si128(_mm_slli_epi16(B3, 8), _mm_srli_epi16(B3, 8)); + + transpose_out(B0, B2, B1, B3); + + __m128i* out_mm = reinterpret_cast<__m128i*>(out); + + _mm_storeu_si128(out_mm + 0, B0); + _mm_storeu_si128(out_mm + 1, B2); + _mm_storeu_si128(out_mm + 2, B1); + _mm_storeu_si128(out_mm + 3, B3); + + CT::unpoison(in, 64); + CT::unpoison(out, 64); + CT::unpoison(EK, 52); + } + +} diff --git a/comm/third_party/botan/src/lib/block/idea/idea_sse2/info.txt b/comm/third_party/botan/src/lib/block/idea/idea_sse2/info.txt new file mode 100644 index 0000000000..b0ca2d02fa --- /dev/null +++ b/comm/third_party/botan/src/lib/block/idea/idea_sse2/info.txt @@ -0,0 +1,7 @@ +<defines> +IDEA_SSE2 -> 20131128 +</defines> + +<isa> +sse2 +</isa> diff --git a/comm/third_party/botan/src/lib/block/idea/info.txt b/comm/third_party/botan/src/lib/block/idea/info.txt new file mode 100644 index 0000000000..bcbdce03f1 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/idea/info.txt @@ -0,0 +1,3 @@ +<defines> +IDEA -> 20131128 +</defines> diff --git a/comm/third_party/botan/src/lib/block/info.txt b/comm/third_party/botan/src/lib/block/info.txt new file mode 100644 index 0000000000..b03a8c8f59 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/info.txt @@ -0,0 +1,7 @@ +<defines> +BLOCK_CIPHER -> 20131128 +</defines> + +<header:public> +block_cipher.h +</header:public> diff --git a/comm/third_party/botan/src/lib/block/kasumi/info.txt b/comm/third_party/botan/src/lib/block/kasumi/info.txt new file mode 100644 index 0000000000..b48183ca90 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/kasumi/info.txt @@ -0,0 +1,3 @@ +<defines> +KASUMI -> 20131128 +</defines> diff --git a/comm/third_party/botan/src/lib/block/kasumi/kasumi.cpp b/comm/third_party/botan/src/lib/block/kasumi/kasumi.cpp new file mode 100644 index 0000000000..819567b1c1 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/kasumi/kasumi.cpp @@ -0,0 +1,238 @@ +/* +* KASUMI +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/kasumi.h> +#include <botan/loadstor.h> +#include <botan/rotate.h> + +namespace Botan { + +namespace { + +/* +* KASUMI S-Boxes +*/ +alignas(64) const uint8_t KASUMI_SBOX_S7[128] = { + 0x36, 0x32, 0x3E, 0x38, 0x16, 0x22, 0x5E, 0x60, 0x26, 0x06, 0x3F, 0x5D, + 0x02, 0x12, 0x7B, 0x21, 0x37, 0x71, 0x27, 0x72, 0x15, 0x43, 0x41, 0x0C, + 0x2F, 0x49, 0x2E, 0x1B, 0x19, 0x6F, 0x7C, 0x51, 0x35, 0x09, 0x79, 0x4F, + 0x34, 0x3C, 0x3A, 0x30, 0x65, 0x7F, 0x28, 0x78, 0x68, 0x46, 0x47, 0x2B, + 0x14, 0x7A, 0x48, 0x3D, 0x17, 0x6D, 0x0D, 0x64, 0x4D, 0x01, 0x10, 0x07, + 0x52, 0x0A, 0x69, 0x62, 0x75, 0x74, 0x4C, 0x0B, 0x59, 0x6A, 0x00, 0x7D, + 0x76, 0x63, 0x56, 0x45, 0x1E, 0x39, 0x7E, 0x57, 0x70, 0x33, 0x11, 0x05, + 0x5F, 0x0E, 0x5A, 0x54, 0x5B, 0x08, 0x23, 0x67, 0x20, 0x61, 0x1C, 0x42, + 0x66, 0x1F, 0x1A, 0x2D, 0x4B, 0x04, 0x55, 0x5C, 0x25, 0x4A, 0x50, 0x31, + 0x44, 0x1D, 0x73, 0x2C, 0x40, 0x6B, 0x6C, 0x18, 0x6E, 0x53, 0x24, 0x4E, + 0x2A, 0x13, 0x0F, 0x29, 0x58, 0x77, 0x3B, 0x03 }; + +alignas(64) const uint16_t KASUMI_SBOX_S9[512] = { + 0x00A7, 0x00EF, 0x00A1, 0x017B, 0x0187, 0x014E, 0x0009, 0x0152, 0x0026, + 0x00E2, 0x0030, 0x0166, 0x01C4, 0x0181, 0x005A, 0x018D, 0x00B7, 0x00FD, + 0x0093, 0x014B, 0x019F, 0x0154, 0x0033, 0x016A, 0x0132, 0x01F4, 0x0106, + 0x0052, 0x00D8, 0x009F, 0x0164, 0x00B1, 0x00AF, 0x00F1, 0x01E9, 0x0025, + 0x00CE, 0x0011, 0x0000, 0x014D, 0x002C, 0x00FE, 0x017A, 0x003A, 0x008F, + 0x00DC, 0x0051, 0x0190, 0x005F, 0x0003, 0x013B, 0x00F5, 0x0036, 0x00EB, + 0x00DA, 0x0195, 0x01D8, 0x0108, 0x00AC, 0x01EE, 0x0173, 0x0122, 0x018F, + 0x004C, 0x00A5, 0x00C5, 0x018B, 0x0079, 0x0101, 0x01E0, 0x01A7, 0x00D4, + 0x00F0, 0x001C, 0x01CE, 0x00B0, 0x0196, 0x01FB, 0x0120, 0x00DF, 0x01F5, + 0x0197, 0x00F9, 0x0109, 0x0059, 0x00BA, 0x00DD, 0x01AC, 0x00A4, 0x004A, + 0x01B8, 0x00C4, 0x01CA, 0x01A5, 0x015E, 0x00A3, 0x00E8, 0x009E, 0x0086, + 0x0162, 0x000D, 0x00FA, 0x01EB, 0x008E, 0x00BF, 0x0045, 0x00C1, 0x01A9, + 0x0098, 0x00E3, 0x016E, 0x0087, 0x0158, 0x012C, 0x0114, 0x00F2, 0x01B5, + 0x0140, 0x0071, 0x0116, 0x000B, 0x00F3, 0x0057, 0x013D, 0x0024, 0x005D, + 0x01F0, 0x001B, 0x01E7, 0x01BE, 0x01E2, 0x0029, 0x0044, 0x009C, 0x01C9, + 0x0083, 0x0146, 0x0193, 0x0153, 0x0014, 0x0027, 0x0073, 0x01BA, 0x007C, + 0x01DB, 0x0180, 0x01FC, 0x0035, 0x0070, 0x00AA, 0x01DF, 0x0097, 0x007E, + 0x00A9, 0x0049, 0x010C, 0x0117, 0x0141, 0x00A8, 0x016C, 0x016B, 0x0124, + 0x002E, 0x01F3, 0x0189, 0x0147, 0x0144, 0x0018, 0x01C8, 0x010B, 0x009D, + 0x01CC, 0x01E8, 0x01AA, 0x0135, 0x00E5, 0x01B7, 0x01FA, 0x00D0, 0x010F, + 0x015D, 0x0191, 0x01B2, 0x00EC, 0x0010, 0x00D1, 0x0167, 0x0034, 0x0038, + 0x0078, 0x00C7, 0x0115, 0x01D1, 0x01A0, 0x00FC, 0x011F, 0x00F6, 0x0006, + 0x0053, 0x0131, 0x01A4, 0x0159, 0x0099, 0x01F6, 0x0041, 0x003D, 0x00F4, + 0x011A, 0x00AD, 0x00DE, 0x01A2, 0x0043, 0x0182, 0x0170, 0x0105, 0x0065, + 0x01DC, 0x0123, 0x00C3, 0x01AE, 0x0031, 0x004F, 0x00A6, 0x014A, 0x0118, + 0x017F, 0x0175, 0x0080, 0x017E, 0x0198, 0x009B, 0x01EF, 0x016F, 0x0184, + 0x0112, 0x006B, 0x01CB, 0x01A1, 0x003E, 0x01C6, 0x0084, 0x00E1, 0x00CB, + 0x013C, 0x00EA, 0x000E, 0x012D, 0x005B, 0x01F7, 0x011E, 0x01A8, 0x00D3, + 0x015B, 0x0133, 0x008C, 0x0176, 0x0023, 0x0067, 0x007D, 0x01AB, 0x0013, + 0x00D6, 0x01C5, 0x0092, 0x01F2, 0x013A, 0x01BC, 0x00E6, 0x0100, 0x0149, + 0x00C6, 0x011D, 0x0032, 0x0074, 0x004E, 0x019A, 0x000A, 0x00CD, 0x01FE, + 0x00AB, 0x00E7, 0x002D, 0x008B, 0x01D3, 0x001D, 0x0056, 0x01F9, 0x0020, + 0x0048, 0x001A, 0x0156, 0x0096, 0x0139, 0x01EA, 0x01AF, 0x00EE, 0x019B, + 0x0145, 0x0095, 0x01D9, 0x0028, 0x0077, 0x00AE, 0x0163, 0x00B9, 0x00E9, + 0x0185, 0x0047, 0x01C0, 0x0111, 0x0174, 0x0037, 0x006E, 0x00B2, 0x0142, + 0x000C, 0x01D5, 0x0188, 0x0171, 0x00BE, 0x0001, 0x006D, 0x0177, 0x0089, + 0x00B5, 0x0058, 0x004B, 0x0134, 0x0104, 0x01E4, 0x0062, 0x0110, 0x0172, + 0x0113, 0x019C, 0x006F, 0x0150, 0x013E, 0x0004, 0x01F8, 0x01EC, 0x0103, + 0x0130, 0x004D, 0x0151, 0x01B3, 0x0015, 0x0165, 0x012F, 0x014C, 0x01E3, + 0x0012, 0x002F, 0x0055, 0x0019, 0x01F1, 0x01DA, 0x0121, 0x0064, 0x010D, + 0x0128, 0x01DE, 0x010E, 0x006A, 0x001F, 0x0068, 0x01B1, 0x0054, 0x019E, + 0x01E6, 0x018A, 0x0060, 0x0063, 0x009A, 0x01FF, 0x0094, 0x019D, 0x0169, + 0x0199, 0x00FF, 0x00A2, 0x00D7, 0x012E, 0x00C9, 0x010A, 0x015F, 0x0157, + 0x0090, 0x01B9, 0x016D, 0x006C, 0x012A, 0x00FB, 0x0022, 0x00B6, 0x01FD, + 0x008A, 0x00D2, 0x014F, 0x0085, 0x0137, 0x0160, 0x0148, 0x008D, 0x018C, + 0x015A, 0x007B, 0x013F, 0x01C2, 0x0119, 0x01AD, 0x00E4, 0x01BB, 0x01E1, + 0x005C, 0x0194, 0x01E5, 0x01A6, 0x00F8, 0x0129, 0x0017, 0x00D5, 0x0082, + 0x01D2, 0x0016, 0x00D9, 0x011B, 0x0046, 0x0126, 0x0168, 0x01A3, 0x007F, + 0x0138, 0x0179, 0x0007, 0x01D4, 0x00C2, 0x0002, 0x0075, 0x0127, 0x01CF, + 0x0102, 0x00E0, 0x01BF, 0x00F7, 0x00BB, 0x0050, 0x018E, 0x011C, 0x0161, + 0x0069, 0x0186, 0x012B, 0x01D7, 0x01D6, 0x00B8, 0x0039, 0x00C8, 0x015C, + 0x003F, 0x00CC, 0x00BC, 0x0021, 0x01C3, 0x0061, 0x001E, 0x0136, 0x00DB, + 0x005E, 0x00A0, 0x0081, 0x01ED, 0x0040, 0x00B3, 0x0107, 0x0066, 0x00BD, + 0x00CF, 0x0072, 0x0192, 0x01B6, 0x01DD, 0x0183, 0x007A, 0x00C0, 0x002A, + 0x017D, 0x0005, 0x0091, 0x0076, 0x00B4, 0x01C1, 0x0125, 0x0143, 0x0088, + 0x017C, 0x002B, 0x0042, 0x003C, 0x01C7, 0x0155, 0x01BD, 0x00CA, 0x01B0, + 0x0008, 0x00ED, 0x000F, 0x0178, 0x01B4, 0x01D0, 0x003B, 0x01CD }; + +/* +* KASUMI FI Function +*/ +uint16_t FI(uint16_t I, uint16_t K) + { + uint16_t D9 = (I >> 7); + uint8_t D7 = (I & 0x7F); + D9 = KASUMI_SBOX_S9[D9] ^ D7; + D7 = KASUMI_SBOX_S7[D7] ^ (D9 & 0x7F); + + D7 ^= (K >> 9); + D9 = KASUMI_SBOX_S9[D9 ^ (K & 0x1FF)] ^ D7; + D7 = KASUMI_SBOX_S7[D7] ^ (D9 & 0x7F); + return static_cast<uint16_t>(D7 << 9) | D9; + } + +} + +/* +* KASUMI Encryption +*/ +void KASUMI::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_EK.empty() == false); + + for(size_t i = 0; i != blocks; ++i) + { + uint16_t B0 = load_be<uint16_t>(in, 0); + uint16_t B1 = load_be<uint16_t>(in, 1); + uint16_t B2 = load_be<uint16_t>(in, 2); + uint16_t B3 = load_be<uint16_t>(in, 3); + + for(size_t j = 0; j != 8; j += 2) + { + const uint16_t* K = &m_EK[8*j]; + + uint16_t R = B1 ^ (rotl<1>(B0) & K[0]); + uint16_t L = B0 ^ (rotl<1>(R) | K[1]); + + L = FI(L ^ K[ 2], K[ 3]) ^ R; + R = FI(R ^ K[ 4], K[ 5]) ^ L; + L = FI(L ^ K[ 6], K[ 7]) ^ R; + + R = B2 ^= R; + L = B3 ^= L; + + R = FI(R ^ K[10], K[11]) ^ L; + L = FI(L ^ K[12], K[13]) ^ R; + R = FI(R ^ K[14], K[15]) ^ L; + + R ^= (rotl<1>(L) & K[8]); + L ^= (rotl<1>(R) | K[9]); + + B0 ^= L; + B1 ^= R; + } + + store_be(out, B0, B1, B2, B3); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* KASUMI Decryption +*/ +void KASUMI::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_EK.empty() == false); + + for(size_t i = 0; i != blocks; ++i) + { + uint16_t B0 = load_be<uint16_t>(in, 0); + uint16_t B1 = load_be<uint16_t>(in, 1); + uint16_t B2 = load_be<uint16_t>(in, 2); + uint16_t B3 = load_be<uint16_t>(in, 3); + + for(size_t j = 0; j != 8; j += 2) + { + const uint16_t* K = &m_EK[8*(6-j)]; + + uint16_t L = B2, R = B3; + + L = FI(L ^ K[10], K[11]) ^ R; + R = FI(R ^ K[12], K[13]) ^ L; + L = FI(L ^ K[14], K[15]) ^ R; + + L ^= (rotl<1>(R) & K[8]); + R ^= (rotl<1>(L) | K[9]); + + R = B0 ^= R; + L = B1 ^= L; + + L ^= (rotl<1>(R) & K[0]); + R ^= (rotl<1>(L) | K[1]); + + R = FI(R ^ K[2], K[3]) ^ L; + L = FI(L ^ K[4], K[5]) ^ R; + R = FI(R ^ K[6], K[7]) ^ L; + + B2 ^= L; + B3 ^= R; + } + + store_be(out, B0, B1, B2, B3); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* KASUMI Key Schedule +*/ +void KASUMI::key_schedule(const uint8_t key[], size_t) + { + static const uint16_t RC[] = { 0x0123, 0x4567, 0x89AB, 0xCDEF, + 0xFEDC, 0xBA98, 0x7654, 0x3210 }; + + secure_vector<uint16_t> K(16); + for(size_t i = 0; i != 8; ++i) + { + K[i] = load_be<uint16_t>(key, i); + K[i+8] = K[i] ^ RC[i]; + } + + m_EK.resize(64); + + for(size_t i = 0; i != 8; ++i) + { + m_EK[8*i ] = rotl<2>(K[(i+0) % 8]); + m_EK[8*i+1] = rotl<1>(K[(i+2) % 8 + 8]); + m_EK[8*i+2] = rotl<5>(K[(i+1) % 8]); + m_EK[8*i+3] = K[(i+4) % 8 + 8]; + m_EK[8*i+4] = rotl<8>(K[(i+5) % 8]); + m_EK[8*i+5] = K[(i+3) % 8 + 8]; + m_EK[8*i+6] = rotl<13>(K[(i+6) % 8]); + m_EK[8*i+7] = K[(i+7) % 8 + 8]; + } + } + +void KASUMI::clear() + { + zap(m_EK); + } + +} diff --git a/comm/third_party/botan/src/lib/block/kasumi/kasumi.h b/comm/third_party/botan/src/lib/block/kasumi/kasumi.h new file mode 100644 index 0000000000..9ea09a1036 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/kasumi/kasumi.h @@ -0,0 +1,37 @@ +/* +* KASUMI +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_KASUMI_H_ +#define BOTAN_KASUMI_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(kasumi.h) + +namespace Botan { + +/** +* KASUMI, the block cipher used in 3G telephony +*/ +class BOTAN_PUBLIC_API(2,0) KASUMI final : public Block_Cipher_Fixed_Params<8, 16> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string name() const override { return "KASUMI"; } + BlockCipher* clone() const override { return new KASUMI; } + private: + void key_schedule(const uint8_t[], size_t) override; + + secure_vector<uint16_t> m_EK; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/lion/info.txt b/comm/third_party/botan/src/lib/block/lion/info.txt new file mode 100644 index 0000000000..a7b93e92ee --- /dev/null +++ b/comm/third_party/botan/src/lib/block/lion/info.txt @@ -0,0 +1,8 @@ +<defines> +LION -> 20131128 +</defines> + +<requires> +stream +hash +</requires> diff --git a/comm/third_party/botan/src/lib/block/lion/lion.cpp b/comm/third_party/botan/src/lib/block/lion/lion.cpp new file mode 100644 index 0000000000..c9589a46ac --- /dev/null +++ b/comm/third_party/botan/src/lib/block/lion/lion.cpp @@ -0,0 +1,138 @@ +/* +* Lion +* (C) 1999-2007,2014 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/lion.h> +#include <botan/exceptn.h> + +namespace Botan { + +/* +* Lion Encryption +*/ +void Lion::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_key1.empty() == false); + + const size_t LEFT_SIZE = left_size(); + const size_t RIGHT_SIZE = right_size(); + + secure_vector<uint8_t> buffer_vec(LEFT_SIZE); + uint8_t* buffer = buffer_vec.data(); + + for(size_t i = 0; i != blocks; ++i) + { + xor_buf(buffer, in, m_key1.data(), LEFT_SIZE); + m_cipher->set_key(buffer, LEFT_SIZE); + m_cipher->cipher(in + LEFT_SIZE, out + LEFT_SIZE, RIGHT_SIZE); + + m_hash->update(out + LEFT_SIZE, RIGHT_SIZE); + m_hash->final(buffer); + xor_buf(out, in, buffer, LEFT_SIZE); + + xor_buf(buffer, out, m_key2.data(), LEFT_SIZE); + m_cipher->set_key(buffer, LEFT_SIZE); + m_cipher->cipher1(out + LEFT_SIZE, RIGHT_SIZE); + + in += m_block_size; + out += m_block_size; + } + } + +/* +* Lion Decryption +*/ +void Lion::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_key1.empty() == false); + + const size_t LEFT_SIZE = left_size(); + const size_t RIGHT_SIZE = right_size(); + + secure_vector<uint8_t> buffer_vec(LEFT_SIZE); + uint8_t* buffer = buffer_vec.data(); + + for(size_t i = 0; i != blocks; ++i) + { + xor_buf(buffer, in, m_key2.data(), LEFT_SIZE); + m_cipher->set_key(buffer, LEFT_SIZE); + m_cipher->cipher(in + LEFT_SIZE, out + LEFT_SIZE, RIGHT_SIZE); + + m_hash->update(out + LEFT_SIZE, RIGHT_SIZE); + m_hash->final(buffer); + xor_buf(out, in, buffer, LEFT_SIZE); + + xor_buf(buffer, out, m_key1.data(), LEFT_SIZE); + m_cipher->set_key(buffer, LEFT_SIZE); + m_cipher->cipher1(out + LEFT_SIZE, RIGHT_SIZE); + + in += m_block_size; + out += m_block_size; + } + } + +/* +* Lion Key Schedule +*/ +void Lion::key_schedule(const uint8_t key[], size_t length) + { + clear(); + + const size_t half = length / 2; + + m_key1.resize(left_size()); + m_key2.resize(left_size()); + clear_mem(m_key1.data(), m_key1.size()); + clear_mem(m_key2.data(), m_key2.size()); + copy_mem(m_key1.data(), key, half); + copy_mem(m_key2.data(), key + half, half); + } + +/* +* Return the name of this type +*/ +std::string Lion::name() const + { + return "Lion(" + m_hash->name() + "," + + m_cipher->name() + "," + + std::to_string(block_size()) + ")"; + } + +/* +* Return a clone of this object +*/ +BlockCipher* Lion::clone() const + { + return new Lion(m_hash->clone(), m_cipher->clone(), block_size()); + } + +/* +* Clear memory of sensitive data +*/ +void Lion::clear() + { + zap(m_key1); + zap(m_key2); + m_hash->clear(); + m_cipher->clear(); + } + +/* +* Lion Constructor +*/ +Lion::Lion(HashFunction* hash, StreamCipher* cipher, size_t bs) : + m_block_size(std::max<size_t>(2*hash->output_length() + 1, bs)), + m_hash(hash), + m_cipher(cipher) + { + if(2*left_size() + 1 > m_block_size) + throw Invalid_Argument(name() + ": Chosen block size is too small"); + + if(!m_cipher->valid_keylength(left_size())) + throw Invalid_Argument(name() + ": This stream/hash combo is invalid"); + } + +} diff --git a/comm/third_party/botan/src/lib/block/lion/lion.h b/comm/third_party/botan/src/lib/block/lion/lion.h new file mode 100644 index 0000000000..fa8e9f4145 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/lion/lion.h @@ -0,0 +1,66 @@ +/* +* Lion +* (C) 1999-2007,2014 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_LION_H_ +#define BOTAN_LION_H_ + +#include <botan/block_cipher.h> +#include <botan/stream_cipher.h> +#include <botan/hash.h> + +BOTAN_FUTURE_INTERNAL_HEADER(lion.h) + +namespace Botan { + +/** +* Lion is a block cipher construction designed by Ross Anderson and +* Eli Biham, described in "Two Practical and Provably Secure Block +* Ciphers: BEAR and LION". It has a variable block size and is +* designed to encrypt very large blocks (up to a megabyte) + +* https://www.cl.cam.ac.uk/~rja14/Papers/bear-lion.pdf +*/ +class BOTAN_PUBLIC_API(2,0) Lion final : public BlockCipher + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + size_t block_size() const override { return m_block_size; } + + Key_Length_Specification key_spec() const override + { + return Key_Length_Specification(2, 2*m_hash->output_length(), 2); + } + + void clear() override; + std::string name() const override; + BlockCipher* clone() const override; + + /** + * @param hash the hash to use internally + * @param cipher the stream cipher to use internally + * @param block_size the size of the block to use + */ + Lion(HashFunction* hash, + StreamCipher* cipher, + size_t block_size); + private: + void key_schedule(const uint8_t[], size_t) override; + + size_t left_size() const { return m_hash->output_length(); } + size_t right_size() const { return m_block_size - left_size(); } + + const size_t m_block_size; + std::unique_ptr<HashFunction> m_hash; + std::unique_ptr<StreamCipher> m_cipher; + secure_vector<uint8_t> m_key1, m_key2; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/misty1/info.txt b/comm/third_party/botan/src/lib/block/misty1/info.txt new file mode 100644 index 0000000000..bf21dd4390 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/misty1/info.txt @@ -0,0 +1,3 @@ +<defines> +MISTY1 -> 20131128 +</defines> diff --git a/comm/third_party/botan/src/lib/block/misty1/misty1.cpp b/comm/third_party/botan/src/lib/block/misty1/misty1.cpp new file mode 100644 index 0000000000..ba5b06abec --- /dev/null +++ b/comm/third_party/botan/src/lib/block/misty1/misty1.cpp @@ -0,0 +1,263 @@ +/* +* MISTY1 +* (C) 1999-2009 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/misty1.h> +#include <botan/loadstor.h> + +namespace Botan { + +namespace { + +alignas(64) static const uint8_t MISTY1_SBOX_S7[128] = { + 0x1B, 0x32, 0x33, 0x5A, 0x3B, 0x10, 0x17, 0x54, 0x5B, 0x1A, 0x72, 0x73, + 0x6B, 0x2C, 0x66, 0x49, 0x1F, 0x24, 0x13, 0x6C, 0x37, 0x2E, 0x3F, 0x4A, + 0x5D, 0x0F, 0x40, 0x56, 0x25, 0x51, 0x1C, 0x04, 0x0B, 0x46, 0x20, 0x0D, + 0x7B, 0x35, 0x44, 0x42, 0x2B, 0x1E, 0x41, 0x14, 0x4B, 0x79, 0x15, 0x6F, + 0x0E, 0x55, 0x09, 0x36, 0x74, 0x0C, 0x67, 0x53, 0x28, 0x0A, 0x7E, 0x38, + 0x02, 0x07, 0x60, 0x29, 0x19, 0x12, 0x65, 0x2F, 0x30, 0x39, 0x08, 0x68, + 0x5F, 0x78, 0x2A, 0x4C, 0x64, 0x45, 0x75, 0x3D, 0x59, 0x48, 0x03, 0x57, + 0x7C, 0x4F, 0x62, 0x3C, 0x1D, 0x21, 0x5E, 0x27, 0x6A, 0x70, 0x4D, 0x3A, + 0x01, 0x6D, 0x6E, 0x63, 0x18, 0x77, 0x23, 0x05, 0x26, 0x76, 0x00, 0x31, + 0x2D, 0x7A, 0x7F, 0x61, 0x50, 0x22, 0x11, 0x06, 0x47, 0x16, 0x52, 0x4E, + 0x71, 0x3E, 0x69, 0x43, 0x34, 0x5C, 0x58, 0x7D }; + +alignas(64) static const uint16_t MISTY1_SBOX_S9[512] = { + 0x01C3, 0x00CB, 0x0153, 0x019F, 0x01E3, 0x00E9, 0x00FB, 0x0035, 0x0181, + 0x00B9, 0x0117, 0x01EB, 0x0133, 0x0009, 0x002D, 0x00D3, 0x00C7, 0x014A, + 0x0037, 0x007E, 0x00EB, 0x0164, 0x0193, 0x01D8, 0x00A3, 0x011E, 0x0055, + 0x002C, 0x001D, 0x01A2, 0x0163, 0x0118, 0x014B, 0x0152, 0x01D2, 0x000F, + 0x002B, 0x0030, 0x013A, 0x00E5, 0x0111, 0x0138, 0x018E, 0x0063, 0x00E3, + 0x00C8, 0x01F4, 0x001B, 0x0001, 0x009D, 0x00F8, 0x01A0, 0x016D, 0x01F3, + 0x001C, 0x0146, 0x007D, 0x00D1, 0x0082, 0x01EA, 0x0183, 0x012D, 0x00F4, + 0x019E, 0x01D3, 0x00DD, 0x01E2, 0x0128, 0x01E0, 0x00EC, 0x0059, 0x0091, + 0x0011, 0x012F, 0x0026, 0x00DC, 0x00B0, 0x018C, 0x010F, 0x01F7, 0x00E7, + 0x016C, 0x00B6, 0x00F9, 0x00D8, 0x0151, 0x0101, 0x014C, 0x0103, 0x00B8, + 0x0154, 0x012B, 0x01AE, 0x0017, 0x0071, 0x000C, 0x0047, 0x0058, 0x007F, + 0x01A4, 0x0134, 0x0129, 0x0084, 0x015D, 0x019D, 0x01B2, 0x01A3, 0x0048, + 0x007C, 0x0051, 0x01CA, 0x0023, 0x013D, 0x01A7, 0x0165, 0x003B, 0x0042, + 0x00DA, 0x0192, 0x00CE, 0x00C1, 0x006B, 0x009F, 0x01F1, 0x012C, 0x0184, + 0x00FA, 0x0196, 0x01E1, 0x0169, 0x017D, 0x0031, 0x0180, 0x010A, 0x0094, + 0x01DA, 0x0186, 0x013E, 0x011C, 0x0060, 0x0175, 0x01CF, 0x0067, 0x0119, + 0x0065, 0x0068, 0x0099, 0x0150, 0x0008, 0x0007, 0x017C, 0x00B7, 0x0024, + 0x0019, 0x00DE, 0x0127, 0x00DB, 0x00E4, 0x01A9, 0x0052, 0x0109, 0x0090, + 0x019C, 0x01C1, 0x0028, 0x01B3, 0x0135, 0x016A, 0x0176, 0x00DF, 0x01E5, + 0x0188, 0x00C5, 0x016E, 0x01DE, 0x01B1, 0x00C3, 0x01DF, 0x0036, 0x00EE, + 0x01EE, 0x00F0, 0x0093, 0x0049, 0x009A, 0x01B6, 0x0069, 0x0081, 0x0125, + 0x000B, 0x005E, 0x00B4, 0x0149, 0x01C7, 0x0174, 0x003E, 0x013B, 0x01B7, + 0x008E, 0x01C6, 0x00AE, 0x0010, 0x0095, 0x01EF, 0x004E, 0x00F2, 0x01FD, + 0x0085, 0x00FD, 0x00F6, 0x00A0, 0x016F, 0x0083, 0x008A, 0x0156, 0x009B, + 0x013C, 0x0107, 0x0167, 0x0098, 0x01D0, 0x01E9, 0x0003, 0x01FE, 0x00BD, + 0x0122, 0x0089, 0x00D2, 0x018F, 0x0012, 0x0033, 0x006A, 0x0142, 0x00ED, + 0x0170, 0x011B, 0x00E2, 0x014F, 0x0158, 0x0131, 0x0147, 0x005D, 0x0113, + 0x01CD, 0x0079, 0x0161, 0x01A5, 0x0179, 0x009E, 0x01B4, 0x00CC, 0x0022, + 0x0132, 0x001A, 0x00E8, 0x0004, 0x0187, 0x01ED, 0x0197, 0x0039, 0x01BF, + 0x01D7, 0x0027, 0x018B, 0x00C6, 0x009C, 0x00D0, 0x014E, 0x006C, 0x0034, + 0x01F2, 0x006E, 0x00CA, 0x0025, 0x00BA, 0x0191, 0x00FE, 0x0013, 0x0106, + 0x002F, 0x01AD, 0x0172, 0x01DB, 0x00C0, 0x010B, 0x01D6, 0x00F5, 0x01EC, + 0x010D, 0x0076, 0x0114, 0x01AB, 0x0075, 0x010C, 0x01E4, 0x0159, 0x0054, + 0x011F, 0x004B, 0x00C4, 0x01BE, 0x00F7, 0x0029, 0x00A4, 0x000E, 0x01F0, + 0x0077, 0x004D, 0x017A, 0x0086, 0x008B, 0x00B3, 0x0171, 0x00BF, 0x010E, + 0x0104, 0x0097, 0x015B, 0x0160, 0x0168, 0x00D7, 0x00BB, 0x0066, 0x01CE, + 0x00FC, 0x0092, 0x01C5, 0x006F, 0x0016, 0x004A, 0x00A1, 0x0139, 0x00AF, + 0x00F1, 0x0190, 0x000A, 0x01AA, 0x0143, 0x017B, 0x0056, 0x018D, 0x0166, + 0x00D4, 0x01FB, 0x014D, 0x0194, 0x019A, 0x0087, 0x01F8, 0x0123, 0x00A7, + 0x01B8, 0x0141, 0x003C, 0x01F9, 0x0140, 0x002A, 0x0155, 0x011A, 0x01A1, + 0x0198, 0x00D5, 0x0126, 0x01AF, 0x0061, 0x012E, 0x0157, 0x01DC, 0x0072, + 0x018A, 0x00AA, 0x0096, 0x0115, 0x00EF, 0x0045, 0x007B, 0x008D, 0x0145, + 0x0053, 0x005F, 0x0178, 0x00B2, 0x002E, 0x0020, 0x01D5, 0x003F, 0x01C9, + 0x01E7, 0x01AC, 0x0044, 0x0038, 0x0014, 0x00B1, 0x016B, 0x00AB, 0x00B5, + 0x005A, 0x0182, 0x01C8, 0x01D4, 0x0018, 0x0177, 0x0064, 0x00CF, 0x006D, + 0x0100, 0x0199, 0x0130, 0x015A, 0x0005, 0x0120, 0x01BB, 0x01BD, 0x00E0, + 0x004F, 0x00D6, 0x013F, 0x01C4, 0x012A, 0x0015, 0x0006, 0x00FF, 0x019B, + 0x00A6, 0x0043, 0x0088, 0x0050, 0x015F, 0x01E8, 0x0121, 0x0073, 0x017E, + 0x00BC, 0x00C2, 0x00C9, 0x0173, 0x0189, 0x01F5, 0x0074, 0x01CC, 0x01E6, + 0x01A8, 0x0195, 0x001F, 0x0041, 0x000D, 0x01BA, 0x0032, 0x003D, 0x01D1, + 0x0080, 0x00A8, 0x0057, 0x01B9, 0x0162, 0x0148, 0x00D9, 0x0105, 0x0062, + 0x007A, 0x0021, 0x01FF, 0x0112, 0x0108, 0x01C0, 0x00A9, 0x011D, 0x01B0, + 0x01A6, 0x00CD, 0x00F3, 0x005C, 0x0102, 0x005B, 0x01D9, 0x0144, 0x01F6, + 0x00AD, 0x00A5, 0x003A, 0x01CB, 0x0136, 0x017F, 0x0046, 0x00E1, 0x001E, + 0x01DD, 0x00E6, 0x0137, 0x01FA, 0x0185, 0x008C, 0x008F, 0x0040, 0x01B5, + 0x00BE, 0x0078, 0x0000, 0x00AC, 0x0110, 0x015E, 0x0124, 0x0002, 0x01BC, + 0x00A2, 0x00EA, 0x0070, 0x01FC, 0x0116, 0x015C, 0x004C, 0x01C2 }; + +/* +* MISTY1 FI Function +*/ +uint16_t FI(uint16_t input, uint16_t key7, uint16_t key9) + { + uint16_t D9 = input >> 7, D7 = input & 0x7F; + D9 = MISTY1_SBOX_S9[D9] ^ D7; + D7 = (MISTY1_SBOX_S7[D7] ^ key7 ^ D9) & 0x7F; + D9 = MISTY1_SBOX_S9[D9 ^ key9] ^ D7; + return static_cast<uint16_t>(D7 << 9) | D9; + } + +} + +/* +* MISTY1 Encryption +*/ +void MISTY1::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_EK.empty() == false); + + for(size_t i = 0; i != blocks; ++i) + { + uint16_t B0 = load_be<uint16_t>(in, 0); + uint16_t B1 = load_be<uint16_t>(in, 1); + uint16_t B2 = load_be<uint16_t>(in, 2); + uint16_t B3 = load_be<uint16_t>(in, 3); + + for(size_t j = 0; j != 12; j += 3) + { + const uint16_t* RK = &m_EK[8 * j]; + + B1 ^= B0 & RK[0]; + B0 ^= B1 | RK[1]; + B3 ^= B2 & RK[2]; + B2 ^= B3 | RK[3]; + + uint16_t T0, T1; + + T0 = FI(B0 ^ RK[ 4], RK[ 5], RK[ 6]) ^ B1; + T1 = FI(B1 ^ RK[ 7], RK[ 8], RK[ 9]) ^ T0; + T0 = FI(T0 ^ RK[10], RK[11], RK[12]) ^ T1; + + B2 ^= T1 ^ RK[13]; + B3 ^= T0; + + T0 = FI(B2 ^ RK[14], RK[15], RK[16]) ^ B3; + T1 = FI(B3 ^ RK[17], RK[18], RK[19]) ^ T0; + T0 = FI(T0 ^ RK[20], RK[21], RK[22]) ^ T1; + + B0 ^= T1 ^ RK[23]; + B1 ^= T0; + } + + B1 ^= B0 & m_EK[96]; + B0 ^= B1 | m_EK[97]; + B3 ^= B2 & m_EK[98]; + B2 ^= B3 | m_EK[99]; + + store_be(out, B2, B3, B0, B1); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* MISTY1 Decryption +*/ +void MISTY1::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_DK.empty() == false); + + for(size_t i = 0; i != blocks; ++i) + { + uint16_t B0 = load_be<uint16_t>(in, 2); + uint16_t B1 = load_be<uint16_t>(in, 3); + uint16_t B2 = load_be<uint16_t>(in, 0); + uint16_t B3 = load_be<uint16_t>(in, 1); + + for(size_t j = 0; j != 12; j += 3) + { + const uint16_t* RK = &m_DK[8 * j]; + + B2 ^= B3 | RK[0]; + B3 ^= B2 & RK[1]; + B0 ^= B1 | RK[2]; + B1 ^= B0 & RK[3]; + + uint16_t T0, T1; + + T0 = FI(B2 ^ RK[ 4], RK[ 5], RK[ 6]) ^ B3; + T1 = FI(B3 ^ RK[ 7], RK[ 8], RK[ 9]) ^ T0; + T0 = FI(T0 ^ RK[10], RK[11], RK[12]) ^ T1; + + B0 ^= T1 ^ RK[13]; + B1 ^= T0; + + T0 = FI(B0 ^ RK[14], RK[15], RK[16]) ^ B1; + T1 = FI(B1 ^ RK[17], RK[18], RK[19]) ^ T0; + T0 = FI(T0 ^ RK[20], RK[21], RK[22]) ^ T1; + + B2 ^= T1 ^ RK[23]; + B3 ^= T0; + } + + B2 ^= B3 | m_DK[96]; + B3 ^= B2 & m_DK[97]; + B0 ^= B1 | m_DK[98]; + B1 ^= B0 & m_DK[99]; + + store_be(out, B0, B1, B2, B3); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* MISTY1 Key Schedule +*/ +void MISTY1::key_schedule(const uint8_t key[], size_t length) + { + secure_vector<uint16_t> KS(32); + for(size_t i = 0; i != length / 2; ++i) + KS[i] = load_be<uint16_t>(key, i); + + for(size_t i = 0; i != 8; ++i) + { + KS[i+ 8] = FI(KS[i], KS[(i+1) % 8] >> 9, KS[(i+1) % 8] & 0x1FF); + KS[i+16] = KS[i+8] >> 9; + KS[i+24] = KS[i+8] & 0x1FF; + } + + /* + * Precomputed indexes for the orderings of the subkeys (MISTY1 reuses + * values) + */ + static const uint8_t EK_ORDER[100] = { + 0x00, 0x0E, 0x0A, 0x04, 0x00, 0x15, 0x1D, 0x02, 0x11, 0x19, 0x07, 0x13, + 0x1B, 0x04, 0x01, 0x16, 0x1E, 0x03, 0x12, 0x1A, 0x00, 0x14, 0x1C, 0x05, + 0x01, 0x0F, 0x0B, 0x05, 0x02, 0x17, 0x1F, 0x04, 0x13, 0x1B, 0x01, 0x15, + 0x1D, 0x06, 0x03, 0x10, 0x18, 0x05, 0x14, 0x1C, 0x02, 0x16, 0x1E, 0x07, + 0x02, 0x08, 0x0C, 0x06, 0x04, 0x11, 0x19, 0x06, 0x15, 0x1D, 0x03, 0x17, + 0x1F, 0x00, 0x05, 0x12, 0x1A, 0x07, 0x16, 0x1E, 0x04, 0x10, 0x18, 0x01, + 0x03, 0x09, 0x0D, 0x07, 0x06, 0x13, 0x1B, 0x00, 0x17, 0x1F, 0x05, 0x11, + 0x19, 0x02, 0x07, 0x14, 0x1C, 0x01, 0x10, 0x18, 0x06, 0x12, 0x1A, 0x03, + 0x04, 0x0A, 0x0E, 0x00 }; + + static const uint8_t DK_ORDER[100] = { + 0x00, 0x0E, 0x0A, 0x04, 0x07, 0x14, 0x1C, 0x01, 0x10, 0x18, 0x06, 0x12, + 0x1A, 0x03, 0x06, 0x13, 0x1B, 0x00, 0x17, 0x1F, 0x05, 0x11, 0x19, 0x02, + 0x07, 0x0D, 0x09, 0x03, 0x05, 0x12, 0x1A, 0x07, 0x16, 0x1E, 0x04, 0x10, + 0x18, 0x01, 0x04, 0x11, 0x19, 0x06, 0x15, 0x1D, 0x03, 0x17, 0x1F, 0x00, + 0x06, 0x0C, 0x08, 0x02, 0x03, 0x10, 0x18, 0x05, 0x14, 0x1C, 0x02, 0x16, + 0x1E, 0x07, 0x02, 0x17, 0x1F, 0x04, 0x13, 0x1B, 0x01, 0x15, 0x1D, 0x06, + 0x05, 0x0B, 0x0F, 0x01, 0x01, 0x16, 0x1E, 0x03, 0x12, 0x1A, 0x00, 0x14, + 0x1C, 0x05, 0x00, 0x15, 0x1D, 0x02, 0x11, 0x19, 0x07, 0x13, 0x1B, 0x04, + 0x04, 0x0A, 0x0E, 0x00 }; + + m_EK.resize(100); + m_DK.resize(100); + + for(size_t i = 0; i != 100; ++i) + { + m_EK[i] = KS[EK_ORDER[i]]; + m_DK[i] = KS[DK_ORDER[i]]; + } + } + +void MISTY1::clear() + { + zap(m_EK); + zap(m_DK); + } + +} diff --git a/comm/third_party/botan/src/lib/block/misty1/misty1.h b/comm/third_party/botan/src/lib/block/misty1/misty1.h new file mode 100644 index 0000000000..b6c4abbd53 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/misty1/misty1.h @@ -0,0 +1,37 @@ +/* +* MISTY1 +* (C) 1999-2008 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_MISTY1_H_ +#define BOTAN_MISTY1_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(misty1.h) + +namespace Botan { + +/** +* MISTY1 with 8 rounds +*/ +class BOTAN_PUBLIC_API(2,0) MISTY1 final : public Block_Cipher_Fixed_Params<8, 16> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string name() const override { return "MISTY1"; } + BlockCipher* clone() const override { return new MISTY1; } + private: + void key_schedule(const uint8_t[], size_t) override; + + secure_vector<uint16_t> m_EK, m_DK; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/noekeon/info.txt b/comm/third_party/botan/src/lib/block/noekeon/info.txt new file mode 100644 index 0000000000..f3445eb7e6 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/noekeon/info.txt @@ -0,0 +1,3 @@ +<defines> +NOEKEON -> 20131128 +</defines> diff --git a/comm/third_party/botan/src/lib/block/noekeon/noekeon.cpp b/comm/third_party/botan/src/lib/block/noekeon/noekeon.cpp new file mode 100644 index 0000000000..0a820c7014 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/noekeon/noekeon.cpp @@ -0,0 +1,267 @@ +/* +* Noekeon +* (C) 1999-2008 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/noekeon.h> +#include <botan/loadstor.h> +#include <botan/rotate.h> +#include <botan/cpuid.h> + +namespace Botan { + +namespace { + +/* +* Noekeon's Theta Operation +*/ +inline void theta(uint32_t& A0, uint32_t& A1, + uint32_t& A2, uint32_t& A3, + const uint32_t EK[4]) + { + uint32_t T = A0 ^ A2; + T ^= rotl<8>(T) ^ rotr<8>(T); + A1 ^= T; + A3 ^= T; + + A0 ^= EK[0]; + A1 ^= EK[1]; + A2 ^= EK[2]; + A3 ^= EK[3]; + + T = A1 ^ A3; + T ^= rotl<8>(T) ^ rotr<8>(T); + A0 ^= T; + A2 ^= T; + } + +/* +* Theta With Null Key +*/ +inline void theta(uint32_t& A0, uint32_t& A1, + uint32_t& A2, uint32_t& A3) + { + uint32_t T = A0 ^ A2; + T ^= rotl<8>(T) ^ rotr<8>(T); + A1 ^= T; + A3 ^= T; + + T = A1 ^ A3; + T ^= rotl<8>(T) ^ rotr<8>(T); + A0 ^= T; + A2 ^= T; + } + +/* +* Noekeon's Gamma S-Box Layer +*/ +inline void gamma(uint32_t& A0, uint32_t& A1, uint32_t& A2, uint32_t& A3) + { + A1 ^= ~A3 & ~A2; + A0 ^= A2 & A1; + + uint32_t T = A3; + A3 = A0; + A0 = T; + + A2 ^= A0 ^ A1 ^ A3; + + A1 ^= ~A3 & ~A2; + A0 ^= A2 & A1; + } + +} + +size_t Noekeon::parallelism() const + { +#if defined(BOTAN_HAS_NOEKEON_SIMD) + if(CPUID::has_simd_32()) + { + return 4; + } +#endif + + return 1; + } + +std::string Noekeon::provider() const + { +#if defined(BOTAN_HAS_NOEKEON_SIMD) + if(CPUID::has_simd_32()) + { + return "simd"; + } +#endif + + return "base"; + } + +/* +* Noekeon Round Constants +*/ +const uint8_t Noekeon::RC[] = { + 0x80, 0x1B, 0x36, 0x6C, 0xD8, 0xAB, 0x4D, 0x9A, + 0x2F, 0x5E, 0xBC, 0x63, 0xC6, 0x97, 0x35, 0x6A, + 0xD4 }; + +/* +* Noekeon Encryption +*/ +void Noekeon::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_EK.empty() == false); + +#if defined(BOTAN_HAS_NOEKEON_SIMD) + if(CPUID::has_simd_32()) + { + while(blocks >= 4) + { + simd_encrypt_4(in, out); + in += 4 * BLOCK_SIZE; + out += 4 * BLOCK_SIZE; + blocks -= 4; + } + } +#endif + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t A0 = load_be<uint32_t>(in, 0); + uint32_t A1 = load_be<uint32_t>(in, 1); + uint32_t A2 = load_be<uint32_t>(in, 2); + uint32_t A3 = load_be<uint32_t>(in, 3); + + for(size_t j = 0; j != 16; ++j) + { + A0 ^= RC[j]; + theta(A0, A1, A2, A3, m_EK.data()); + + A1 = rotl<1>(A1); + A2 = rotl<5>(A2); + A3 = rotl<2>(A3); + + gamma(A0, A1, A2, A3); + + A1 = rotr<1>(A1); + A2 = rotr<5>(A2); + A3 = rotr<2>(A3); + } + + A0 ^= RC[16]; + theta(A0, A1, A2, A3, m_EK.data()); + + store_be(out, A0, A1, A2, A3); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* Noekeon Encryption +*/ +void Noekeon::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_DK.empty() == false); + +#if defined(BOTAN_HAS_NOEKEON_SIMD) + if(CPUID::has_simd_32()) + { + while(blocks >= 4) + { + simd_decrypt_4(in, out); + in += 4 * BLOCK_SIZE; + out += 4 * BLOCK_SIZE; + blocks -= 4; + } + } +#endif + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t A0 = load_be<uint32_t>(in, 0); + uint32_t A1 = load_be<uint32_t>(in, 1); + uint32_t A2 = load_be<uint32_t>(in, 2); + uint32_t A3 = load_be<uint32_t>(in, 3); + + for(size_t j = 16; j != 0; --j) + { + theta(A0, A1, A2, A3, m_DK.data()); + A0 ^= RC[j]; + + A1 = rotl<1>(A1); + A2 = rotl<5>(A2); + A3 = rotl<2>(A3); + + gamma(A0, A1, A2, A3); + + A1 = rotr<1>(A1); + A2 = rotr<5>(A2); + A3 = rotr<2>(A3); + } + + theta(A0, A1, A2, A3, m_DK.data()); + A0 ^= RC[0]; + + store_be(out, A0, A1, A2, A3); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* Noekeon Key Schedule +*/ +void Noekeon::key_schedule(const uint8_t key[], size_t) + { + uint32_t A0 = load_be<uint32_t>(key, 0); + uint32_t A1 = load_be<uint32_t>(key, 1); + uint32_t A2 = load_be<uint32_t>(key, 2); + uint32_t A3 = load_be<uint32_t>(key, 3); + + for(size_t i = 0; i != 16; ++i) + { + A0 ^= RC[i]; + theta(A0, A1, A2, A3); + + A1 = rotl<1>(A1); + A2 = rotl<5>(A2); + A3 = rotl<2>(A3); + + gamma(A0, A1, A2, A3); + + A1 = rotr<1>(A1); + A2 = rotr<5>(A2); + A3 = rotr<2>(A3); + } + + A0 ^= RC[16]; + + m_DK.resize(4); + m_DK[0] = A0; + m_DK[1] = A1; + m_DK[2] = A2; + m_DK[3] = A3; + + theta(A0, A1, A2, A3); + + m_EK.resize(4); + m_EK[0] = A0; + m_EK[1] = A1; + m_EK[2] = A2; + m_EK[3] = A3; + } + +/* +* Clear memory of sensitive data +*/ +void Noekeon::clear() + { + zap(m_EK); + zap(m_DK); + } + +} diff --git a/comm/third_party/botan/src/lib/block/noekeon/noekeon.h b/comm/third_party/botan/src/lib/block/noekeon/noekeon.h new file mode 100644 index 0000000000..9e7e4a5ff6 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/noekeon/noekeon.h @@ -0,0 +1,49 @@ +/* +* Noekeon +* (C) 1999-2008 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_NOEKEON_H_ +#define BOTAN_NOEKEON_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(noekeon.h) + +namespace Botan { + +/** +* Noekeon +*/ +class BOTAN_PUBLIC_API(2,0) Noekeon final : public Block_Cipher_Fixed_Params<16, 16> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + std::string provider() const override; + void clear() override; + std::string name() const override { return "Noekeon"; } + BlockCipher* clone() const override { return new Noekeon; } + size_t parallelism() const override; + + private: +#if defined(BOTAN_HAS_NOEKEON_SIMD) + void simd_encrypt_4(const uint8_t in[], uint8_t out[]) const; + void simd_decrypt_4(const uint8_t in[], uint8_t out[]) const; +#endif + + /** + * The Noekeon round constants + */ + static const uint8_t RC[17]; + + void key_schedule(const uint8_t[], size_t) override; + secure_vector<uint32_t> m_EK, m_DK; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/noekeon/noekeon_simd/info.txt b/comm/third_party/botan/src/lib/block/noekeon/noekeon_simd/info.txt new file mode 100644 index 0000000000..a09e491b5c --- /dev/null +++ b/comm/third_party/botan/src/lib/block/noekeon/noekeon_simd/info.txt @@ -0,0 +1,8 @@ +<defines> +NOEKEON_SIMD -> 20160903 +</defines> + +<requires> +noekeon +simd +</requires> diff --git a/comm/third_party/botan/src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp b/comm/third_party/botan/src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp new file mode 100644 index 0000000000..83467a054c --- /dev/null +++ b/comm/third_party/botan/src/lib/block/noekeon/noekeon_simd/noekeon_simd.cpp @@ -0,0 +1,143 @@ +/* +* Noekeon in SIMD +* (C) 2010 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/noekeon.h> +#include <botan/internal/simd_32.h> + +namespace Botan { + +/* +* Noekeon's Theta Operation +*/ +#define NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3) \ + do { \ + SIMD_4x32 T = A0 ^ A2; \ + T ^= T.rotl<8>() ^ T.rotr<8>(); \ + A1 ^= T; \ + A3 ^= T; \ + \ + A0 ^= K0; \ + A1 ^= K1; \ + A2 ^= K2; \ + A3 ^= K3; \ + \ + T = A1 ^ A3; \ + T ^= T.rotl<8>() ^ T.rotr<8>(); \ + A0 ^= T; \ + A2 ^= T; \ + } while(0) + +/* +* Noekeon's Gamma S-Box Layer +*/ +#define NOK_SIMD_GAMMA(A0, A1, A2, A3) \ + do \ + { \ + A1 ^= A3.andc(~A2); \ + A0 ^= A2 & A1; \ + \ + SIMD_4x32 T = A3; \ + A3 = A0; \ + A0 = T; \ + \ + A2 ^= A0 ^ A1 ^ A3; \ + \ + A1 ^= A3.andc(~A2); \ + A0 ^= A2 & A1; \ + } while(0) + +/* +* Noekeon Encryption +*/ +void Noekeon::simd_encrypt_4(const uint8_t in[], uint8_t out[]) const + { + const SIMD_4x32 K0 = SIMD_4x32::splat(m_EK[0]); + const SIMD_4x32 K1 = SIMD_4x32::splat(m_EK[1]); + const SIMD_4x32 K2 = SIMD_4x32::splat(m_EK[2]); + const SIMD_4x32 K3 = SIMD_4x32::splat(m_EK[3]); + + SIMD_4x32 A0 = SIMD_4x32::load_be(in ); + SIMD_4x32 A1 = SIMD_4x32::load_be(in + 16); + SIMD_4x32 A2 = SIMD_4x32::load_be(in + 32); + SIMD_4x32 A3 = SIMD_4x32::load_be(in + 48); + + SIMD_4x32::transpose(A0, A1, A2, A3); + + for(size_t i = 0; i != 16; ++i) + { + A0 ^= SIMD_4x32::splat(RC[i]); + + NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3); + + A1 = A1.rotl<1>(); + A2 = A2.rotl<5>(); + A3 = A3.rotl<2>(); + + NOK_SIMD_GAMMA(A0, A1, A2, A3); + + A1 = A1.rotr<1>(); + A2 = A2.rotr<5>(); + A3 = A3.rotr<2>(); + } + + A0 ^= SIMD_4x32::splat(RC[16]); + NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3); + + SIMD_4x32::transpose(A0, A1, A2, A3); + + A0.store_be(out); + A1.store_be(out + 16); + A2.store_be(out + 32); + A3.store_be(out + 48); + } + +/* +* Noekeon Encryption +*/ +void Noekeon::simd_decrypt_4(const uint8_t in[], uint8_t out[]) const + { + const SIMD_4x32 K0 = SIMD_4x32::splat(m_DK[0]); + const SIMD_4x32 K1 = SIMD_4x32::splat(m_DK[1]); + const SIMD_4x32 K2 = SIMD_4x32::splat(m_DK[2]); + const SIMD_4x32 K3 = SIMD_4x32::splat(m_DK[3]); + + SIMD_4x32 A0 = SIMD_4x32::load_be(in ); + SIMD_4x32 A1 = SIMD_4x32::load_be(in + 16); + SIMD_4x32 A2 = SIMD_4x32::load_be(in + 32); + SIMD_4x32 A3 = SIMD_4x32::load_be(in + 48); + + SIMD_4x32::transpose(A0, A1, A2, A3); + + for(size_t i = 0; i != 16; ++i) + { + NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3); + + A0 ^= SIMD_4x32::splat(RC[16-i]); + + A1 = A1.rotl<1>(); + A2 = A2.rotl<5>(); + A3 = A3.rotl<2>(); + + NOK_SIMD_GAMMA(A0, A1, A2, A3); + + A1 = A1.rotr<1>(); + A2 = A2.rotr<5>(); + A3 = A3.rotr<2>(); + } + + NOK_SIMD_THETA(A0, A1, A2, A3, K0, K1, K2, K3); + A0 ^= SIMD_4x32::splat(RC[0]); + + SIMD_4x32::transpose(A0, A1, A2, A3); + + A0.store_be(out); + A1.store_be(out + 16); + A2.store_be(out + 32); + A3.store_be(out + 48); + } + +} diff --git a/comm/third_party/botan/src/lib/block/seed/info.txt b/comm/third_party/botan/src/lib/block/seed/info.txt new file mode 100644 index 0000000000..dd395eda1a --- /dev/null +++ b/comm/third_party/botan/src/lib/block/seed/info.txt @@ -0,0 +1,3 @@ +<defines> +SEED -> 20131128 +</defines> diff --git a/comm/third_party/botan/src/lib/block/seed/seed.cpp b/comm/third_party/botan/src/lib/block/seed/seed.cpp new file mode 100644 index 0000000000..e596c70acb --- /dev/null +++ b/comm/third_party/botan/src/lib/block/seed/seed.cpp @@ -0,0 +1,328 @@ +/* +* SEED +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/seed.h> +#include <botan/loadstor.h> + +namespace Botan { + +namespace { + +alignas(64) const uint32_t SEED_S0[256] = { + 0x2989A1A8, 0x05858184, 0x16C6D2D4, 0x13C3D3D0, 0x14445054, 0x1D0D111C, + 0x2C8CA0AC, 0x25052124, 0x1D4D515C, 0x03434340, 0x18081018, 0x1E0E121C, + 0x11415150, 0x3CCCF0FC, 0x0ACAC2C8, 0x23436360, 0x28082028, 0x04444044, + 0x20002020, 0x1D8D919C, 0x20C0E0E0, 0x22C2E2E0, 0x08C8C0C8, 0x17071314, + 0x2585A1A4, 0x0F8F838C, 0x03030300, 0x3B4B7378, 0x3B8BB3B8, 0x13031310, + 0x12C2D2D0, 0x2ECEE2EC, 0x30407070, 0x0C8C808C, 0x3F0F333C, 0x2888A0A8, + 0x32023230, 0x1DCDD1DC, 0x36C6F2F4, 0x34447074, 0x2CCCE0EC, 0x15859194, + 0x0B0B0308, 0x17475354, 0x1C4C505C, 0x1B4B5358, 0x3D8DB1BC, 0x01010100, + 0x24042024, 0x1C0C101C, 0x33437370, 0x18889098, 0x10001010, 0x0CCCC0CC, + 0x32C2F2F0, 0x19C9D1D8, 0x2C0C202C, 0x27C7E3E4, 0x32427270, 0x03838380, + 0x1B8B9398, 0x11C1D1D0, 0x06868284, 0x09C9C1C8, 0x20406060, 0x10405050, + 0x2383A3A0, 0x2BCBE3E8, 0x0D0D010C, 0x3686B2B4, 0x1E8E929C, 0x0F4F434C, + 0x3787B3B4, 0x1A4A5258, 0x06C6C2C4, 0x38487078, 0x2686A2A4, 0x12021210, + 0x2F8FA3AC, 0x15C5D1D4, 0x21416160, 0x03C3C3C0, 0x3484B0B4, 0x01414140, + 0x12425250, 0x3D4D717C, 0x0D8D818C, 0x08080008, 0x1F0F131C, 0x19899198, + 0x00000000, 0x19091118, 0x04040004, 0x13435350, 0x37C7F3F4, 0x21C1E1E0, + 0x3DCDF1FC, 0x36467274, 0x2F0F232C, 0x27072324, 0x3080B0B0, 0x0B8B8388, + 0x0E0E020C, 0x2B8BA3A8, 0x2282A2A0, 0x2E4E626C, 0x13839390, 0x0D4D414C, + 0x29496168, 0x3C4C707C, 0x09090108, 0x0A0A0208, 0x3F8FB3BC, 0x2FCFE3EC, + 0x33C3F3F0, 0x05C5C1C4, 0x07878384, 0x14041014, 0x3ECEF2FC, 0x24446064, + 0x1ECED2DC, 0x2E0E222C, 0x0B4B4348, 0x1A0A1218, 0x06060204, 0x21012120, + 0x2B4B6368, 0x26466264, 0x02020200, 0x35C5F1F4, 0x12829290, 0x0A8A8288, + 0x0C0C000C, 0x3383B3B0, 0x3E4E727C, 0x10C0D0D0, 0x3A4A7278, 0x07474344, + 0x16869294, 0x25C5E1E4, 0x26062224, 0x00808080, 0x2D8DA1AC, 0x1FCFD3DC, + 0x2181A1A0, 0x30003030, 0x37073334, 0x2E8EA2AC, 0x36063234, 0x15051114, + 0x22022220, 0x38083038, 0x34C4F0F4, 0x2787A3A4, 0x05454144, 0x0C4C404C, + 0x01818180, 0x29C9E1E8, 0x04848084, 0x17879394, 0x35053134, 0x0BCBC3C8, + 0x0ECEC2CC, 0x3C0C303C, 0x31417170, 0x11011110, 0x07C7C3C4, 0x09898188, + 0x35457174, 0x3BCBF3F8, 0x1ACAD2D8, 0x38C8F0F8, 0x14849094, 0x19495158, + 0x02828280, 0x04C4C0C4, 0x3FCFF3FC, 0x09494148, 0x39093138, 0x27476364, + 0x00C0C0C0, 0x0FCFC3CC, 0x17C7D3D4, 0x3888B0B8, 0x0F0F030C, 0x0E8E828C, + 0x02424240, 0x23032320, 0x11819190, 0x2C4C606C, 0x1BCBD3D8, 0x2484A0A4, + 0x34043034, 0x31C1F1F0, 0x08484048, 0x02C2C2C0, 0x2F4F636C, 0x3D0D313C, + 0x2D0D212C, 0x00404040, 0x3E8EB2BC, 0x3E0E323C, 0x3C8CB0BC, 0x01C1C1C0, + 0x2A8AA2A8, 0x3A8AB2B8, 0x0E4E424C, 0x15455154, 0x3B0B3338, 0x1CCCD0DC, + 0x28486068, 0x3F4F737C, 0x1C8C909C, 0x18C8D0D8, 0x0A4A4248, 0x16465254, + 0x37477374, 0x2080A0A0, 0x2DCDE1EC, 0x06464244, 0x3585B1B4, 0x2B0B2328, + 0x25456164, 0x3ACAF2F8, 0x23C3E3E0, 0x3989B1B8, 0x3181B1B0, 0x1F8F939C, + 0x1E4E525C, 0x39C9F1F8, 0x26C6E2E4, 0x3282B2B0, 0x31013130, 0x2ACAE2E8, + 0x2D4D616C, 0x1F4F535C, 0x24C4E0E4, 0x30C0F0F0, 0x0DCDC1CC, 0x08888088, + 0x16061214, 0x3A0A3238, 0x18485058, 0x14C4D0D4, 0x22426260, 0x29092128, + 0x07070304, 0x33033330, 0x28C8E0E8, 0x1B0B1318, 0x05050104, 0x39497178, + 0x10809090, 0x2A4A6268, 0x2A0A2228, 0x1A8A9298 }; + +alignas(64) const uint32_t SEED_S1[256] = { + 0x38380830, 0xE828C8E0, 0x2C2D0D21, 0xA42686A2, 0xCC0FCFC3, 0xDC1ECED2, + 0xB03383B3, 0xB83888B0, 0xAC2F8FA3, 0x60204060, 0x54154551, 0xC407C7C3, + 0x44044440, 0x6C2F4F63, 0x682B4B63, 0x581B4B53, 0xC003C3C3, 0x60224262, + 0x30330333, 0xB43585B1, 0x28290921, 0xA02080A0, 0xE022C2E2, 0xA42787A3, + 0xD013C3D3, 0x90118191, 0x10110111, 0x04060602, 0x1C1C0C10, 0xBC3C8CB0, + 0x34360632, 0x480B4B43, 0xEC2FCFE3, 0x88088880, 0x6C2C4C60, 0xA82888A0, + 0x14170713, 0xC404C4C0, 0x14160612, 0xF434C4F0, 0xC002C2C2, 0x44054541, + 0xE021C1E1, 0xD416C6D2, 0x3C3F0F33, 0x3C3D0D31, 0x8C0E8E82, 0x98188890, + 0x28280820, 0x4C0E4E42, 0xF436C6F2, 0x3C3E0E32, 0xA42585A1, 0xF839C9F1, + 0x0C0D0D01, 0xDC1FCFD3, 0xD818C8D0, 0x282B0B23, 0x64264662, 0x783A4A72, + 0x24270723, 0x2C2F0F23, 0xF031C1F1, 0x70324272, 0x40024242, 0xD414C4D0, + 0x40014141, 0xC000C0C0, 0x70334373, 0x64274763, 0xAC2C8CA0, 0x880B8B83, + 0xF437C7F3, 0xAC2D8DA1, 0x80008080, 0x1C1F0F13, 0xC80ACAC2, 0x2C2C0C20, + 0xA82A8AA2, 0x34340430, 0xD012C2D2, 0x080B0B03, 0xEC2ECEE2, 0xE829C9E1, + 0x5C1D4D51, 0x94148490, 0x18180810, 0xF838C8F0, 0x54174753, 0xAC2E8EA2, + 0x08080800, 0xC405C5C1, 0x10130313, 0xCC0DCDC1, 0x84068682, 0xB83989B1, + 0xFC3FCFF3, 0x7C3D4D71, 0xC001C1C1, 0x30310131, 0xF435C5F1, 0x880A8A82, + 0x682A4A62, 0xB03181B1, 0xD011C1D1, 0x20200020, 0xD417C7D3, 0x00020202, + 0x20220222, 0x04040400, 0x68284860, 0x70314171, 0x04070703, 0xD81BCBD3, + 0x9C1D8D91, 0x98198991, 0x60214161, 0xBC3E8EB2, 0xE426C6E2, 0x58194951, + 0xDC1DCDD1, 0x50114151, 0x90108090, 0xDC1CCCD0, 0x981A8A92, 0xA02383A3, + 0xA82B8BA3, 0xD010C0D0, 0x80018181, 0x0C0F0F03, 0x44074743, 0x181A0A12, + 0xE023C3E3, 0xEC2CCCE0, 0x8C0D8D81, 0xBC3F8FB3, 0x94168692, 0x783B4B73, + 0x5C1C4C50, 0xA02282A2, 0xA02181A1, 0x60234363, 0x20230323, 0x4C0D4D41, + 0xC808C8C0, 0x9C1E8E92, 0x9C1C8C90, 0x383A0A32, 0x0C0C0C00, 0x2C2E0E22, + 0xB83A8AB2, 0x6C2E4E62, 0x9C1F8F93, 0x581A4A52, 0xF032C2F2, 0x90128292, + 0xF033C3F3, 0x48094941, 0x78384870, 0xCC0CCCC0, 0x14150511, 0xF83BCBF3, + 0x70304070, 0x74354571, 0x7C3F4F73, 0x34350531, 0x10100010, 0x00030303, + 0x64244460, 0x6C2D4D61, 0xC406C6C2, 0x74344470, 0xD415C5D1, 0xB43484B0, + 0xE82ACAE2, 0x08090901, 0x74364672, 0x18190911, 0xFC3ECEF2, 0x40004040, + 0x10120212, 0xE020C0E0, 0xBC3D8DB1, 0x04050501, 0xF83ACAF2, 0x00010101, + 0xF030C0F0, 0x282A0A22, 0x5C1E4E52, 0xA82989A1, 0x54164652, 0x40034343, + 0x84058581, 0x14140410, 0x88098981, 0x981B8B93, 0xB03080B0, 0xE425C5E1, + 0x48084840, 0x78394971, 0x94178793, 0xFC3CCCF0, 0x1C1E0E12, 0x80028282, + 0x20210121, 0x8C0C8C80, 0x181B0B13, 0x5C1F4F53, 0x74374773, 0x54144450, + 0xB03282B2, 0x1C1D0D11, 0x24250521, 0x4C0F4F43, 0x00000000, 0x44064642, + 0xEC2DCDE1, 0x58184850, 0x50124252, 0xE82BCBE3, 0x7C3E4E72, 0xD81ACAD2, + 0xC809C9C1, 0xFC3DCDF1, 0x30300030, 0x94158591, 0x64254561, 0x3C3C0C30, + 0xB43686B2, 0xE424C4E0, 0xB83B8BB3, 0x7C3C4C70, 0x0C0E0E02, 0x50104050, + 0x38390931, 0x24260622, 0x30320232, 0x84048480, 0x68294961, 0x90138393, + 0x34370733, 0xE427C7E3, 0x24240420, 0xA42484A0, 0xC80BCBC3, 0x50134353, + 0x080A0A02, 0x84078783, 0xD819C9D1, 0x4C0C4C40, 0x80038383, 0x8C0F8F83, + 0xCC0ECEC2, 0x383B0B33, 0x480A4A42, 0xB43787B3 }; + +alignas(64) const uint32_t SEED_S2[256] = { + 0xA1A82989, 0x81840585, 0xD2D416C6, 0xD3D013C3, 0x50541444, 0x111C1D0D, + 0xA0AC2C8C, 0x21242505, 0x515C1D4D, 0x43400343, 0x10181808, 0x121C1E0E, + 0x51501141, 0xF0FC3CCC, 0xC2C80ACA, 0x63602343, 0x20282808, 0x40440444, + 0x20202000, 0x919C1D8D, 0xE0E020C0, 0xE2E022C2, 0xC0C808C8, 0x13141707, + 0xA1A42585, 0x838C0F8F, 0x03000303, 0x73783B4B, 0xB3B83B8B, 0x13101303, + 0xD2D012C2, 0xE2EC2ECE, 0x70703040, 0x808C0C8C, 0x333C3F0F, 0xA0A82888, + 0x32303202, 0xD1DC1DCD, 0xF2F436C6, 0x70743444, 0xE0EC2CCC, 0x91941585, + 0x03080B0B, 0x53541747, 0x505C1C4C, 0x53581B4B, 0xB1BC3D8D, 0x01000101, + 0x20242404, 0x101C1C0C, 0x73703343, 0x90981888, 0x10101000, 0xC0CC0CCC, + 0xF2F032C2, 0xD1D819C9, 0x202C2C0C, 0xE3E427C7, 0x72703242, 0x83800383, + 0x93981B8B, 0xD1D011C1, 0x82840686, 0xC1C809C9, 0x60602040, 0x50501040, + 0xA3A02383, 0xE3E82BCB, 0x010C0D0D, 0xB2B43686, 0x929C1E8E, 0x434C0F4F, + 0xB3B43787, 0x52581A4A, 0xC2C406C6, 0x70783848, 0xA2A42686, 0x12101202, + 0xA3AC2F8F, 0xD1D415C5, 0x61602141, 0xC3C003C3, 0xB0B43484, 0x41400141, + 0x52501242, 0x717C3D4D, 0x818C0D8D, 0x00080808, 0x131C1F0F, 0x91981989, + 0x00000000, 0x11181909, 0x00040404, 0x53501343, 0xF3F437C7, 0xE1E021C1, + 0xF1FC3DCD, 0x72743646, 0x232C2F0F, 0x23242707, 0xB0B03080, 0x83880B8B, + 0x020C0E0E, 0xA3A82B8B, 0xA2A02282, 0x626C2E4E, 0x93901383, 0x414C0D4D, + 0x61682949, 0x707C3C4C, 0x01080909, 0x02080A0A, 0xB3BC3F8F, 0xE3EC2FCF, + 0xF3F033C3, 0xC1C405C5, 0x83840787, 0x10141404, 0xF2FC3ECE, 0x60642444, + 0xD2DC1ECE, 0x222C2E0E, 0x43480B4B, 0x12181A0A, 0x02040606, 0x21202101, + 0x63682B4B, 0x62642646, 0x02000202, 0xF1F435C5, 0x92901282, 0x82880A8A, + 0x000C0C0C, 0xB3B03383, 0x727C3E4E, 0xD0D010C0, 0x72783A4A, 0x43440747, + 0x92941686, 0xE1E425C5, 0x22242606, 0x80800080, 0xA1AC2D8D, 0xD3DC1FCF, + 0xA1A02181, 0x30303000, 0x33343707, 0xA2AC2E8E, 0x32343606, 0x11141505, + 0x22202202, 0x30383808, 0xF0F434C4, 0xA3A42787, 0x41440545, 0x404C0C4C, + 0x81800181, 0xE1E829C9, 0x80840484, 0x93941787, 0x31343505, 0xC3C80BCB, + 0xC2CC0ECE, 0x303C3C0C, 0x71703141, 0x11101101, 0xC3C407C7, 0x81880989, + 0x71743545, 0xF3F83BCB, 0xD2D81ACA, 0xF0F838C8, 0x90941484, 0x51581949, + 0x82800282, 0xC0C404C4, 0xF3FC3FCF, 0x41480949, 0x31383909, 0x63642747, + 0xC0C000C0, 0xC3CC0FCF, 0xD3D417C7, 0xB0B83888, 0x030C0F0F, 0x828C0E8E, + 0x42400242, 0x23202303, 0x91901181, 0x606C2C4C, 0xD3D81BCB, 0xA0A42484, + 0x30343404, 0xF1F031C1, 0x40480848, 0xC2C002C2, 0x636C2F4F, 0x313C3D0D, + 0x212C2D0D, 0x40400040, 0xB2BC3E8E, 0x323C3E0E, 0xB0BC3C8C, 0xC1C001C1, + 0xA2A82A8A, 0xB2B83A8A, 0x424C0E4E, 0x51541545, 0x33383B0B, 0xD0DC1CCC, + 0x60682848, 0x737C3F4F, 0x909C1C8C, 0xD0D818C8, 0x42480A4A, 0x52541646, + 0x73743747, 0xA0A02080, 0xE1EC2DCD, 0x42440646, 0xB1B43585, 0x23282B0B, + 0x61642545, 0xF2F83ACA, 0xE3E023C3, 0xB1B83989, 0xB1B03181, 0x939C1F8F, + 0x525C1E4E, 0xF1F839C9, 0xE2E426C6, 0xB2B03282, 0x31303101, 0xE2E82ACA, + 0x616C2D4D, 0x535C1F4F, 0xE0E424C4, 0xF0F030C0, 0xC1CC0DCD, 0x80880888, + 0x12141606, 0x32383A0A, 0x50581848, 0xD0D414C4, 0x62602242, 0x21282909, + 0x03040707, 0x33303303, 0xE0E828C8, 0x13181B0B, 0x01040505, 0x71783949, + 0x90901080, 0x62682A4A, 0x22282A0A, 0x92981A8A }; + +alignas(64) const uint32_t SEED_S3[256] = { + 0x08303838, 0xC8E0E828, 0x0D212C2D, 0x86A2A426, 0xCFC3CC0F, 0xCED2DC1E, + 0x83B3B033, 0x88B0B838, 0x8FA3AC2F, 0x40606020, 0x45515415, 0xC7C3C407, + 0x44404404, 0x4F636C2F, 0x4B63682B, 0x4B53581B, 0xC3C3C003, 0x42626022, + 0x03333033, 0x85B1B435, 0x09212829, 0x80A0A020, 0xC2E2E022, 0x87A3A427, + 0xC3D3D013, 0x81919011, 0x01111011, 0x06020406, 0x0C101C1C, 0x8CB0BC3C, + 0x06323436, 0x4B43480B, 0xCFE3EC2F, 0x88808808, 0x4C606C2C, 0x88A0A828, + 0x07131417, 0xC4C0C404, 0x06121416, 0xC4F0F434, 0xC2C2C002, 0x45414405, + 0xC1E1E021, 0xC6D2D416, 0x0F333C3F, 0x0D313C3D, 0x8E828C0E, 0x88909818, + 0x08202828, 0x4E424C0E, 0xC6F2F436, 0x0E323C3E, 0x85A1A425, 0xC9F1F839, + 0x0D010C0D, 0xCFD3DC1F, 0xC8D0D818, 0x0B23282B, 0x46626426, 0x4A72783A, + 0x07232427, 0x0F232C2F, 0xC1F1F031, 0x42727032, 0x42424002, 0xC4D0D414, + 0x41414001, 0xC0C0C000, 0x43737033, 0x47636427, 0x8CA0AC2C, 0x8B83880B, + 0xC7F3F437, 0x8DA1AC2D, 0x80808000, 0x0F131C1F, 0xCAC2C80A, 0x0C202C2C, + 0x8AA2A82A, 0x04303434, 0xC2D2D012, 0x0B03080B, 0xCEE2EC2E, 0xC9E1E829, + 0x4D515C1D, 0x84909414, 0x08101818, 0xC8F0F838, 0x47535417, 0x8EA2AC2E, + 0x08000808, 0xC5C1C405, 0x03131013, 0xCDC1CC0D, 0x86828406, 0x89B1B839, + 0xCFF3FC3F, 0x4D717C3D, 0xC1C1C001, 0x01313031, 0xC5F1F435, 0x8A82880A, + 0x4A62682A, 0x81B1B031, 0xC1D1D011, 0x00202020, 0xC7D3D417, 0x02020002, + 0x02222022, 0x04000404, 0x48606828, 0x41717031, 0x07030407, 0xCBD3D81B, + 0x8D919C1D, 0x89919819, 0x41616021, 0x8EB2BC3E, 0xC6E2E426, 0x49515819, + 0xCDD1DC1D, 0x41515011, 0x80909010, 0xCCD0DC1C, 0x8A92981A, 0x83A3A023, + 0x8BA3A82B, 0xC0D0D010, 0x81818001, 0x0F030C0F, 0x47434407, 0x0A12181A, + 0xC3E3E023, 0xCCE0EC2C, 0x8D818C0D, 0x8FB3BC3F, 0x86929416, 0x4B73783B, + 0x4C505C1C, 0x82A2A022, 0x81A1A021, 0x43636023, 0x03232023, 0x4D414C0D, + 0xC8C0C808, 0x8E929C1E, 0x8C909C1C, 0x0A32383A, 0x0C000C0C, 0x0E222C2E, + 0x8AB2B83A, 0x4E626C2E, 0x8F939C1F, 0x4A52581A, 0xC2F2F032, 0x82929012, + 0xC3F3F033, 0x49414809, 0x48707838, 0xCCC0CC0C, 0x05111415, 0xCBF3F83B, + 0x40707030, 0x45717435, 0x4F737C3F, 0x05313435, 0x00101010, 0x03030003, + 0x44606424, 0x4D616C2D, 0xC6C2C406, 0x44707434, 0xC5D1D415, 0x84B0B434, + 0xCAE2E82A, 0x09010809, 0x46727436, 0x09111819, 0xCEF2FC3E, 0x40404000, + 0x02121012, 0xC0E0E020, 0x8DB1BC3D, 0x05010405, 0xCAF2F83A, 0x01010001, + 0xC0F0F030, 0x0A22282A, 0x4E525C1E, 0x89A1A829, 0x46525416, 0x43434003, + 0x85818405, 0x04101414, 0x89818809, 0x8B93981B, 0x80B0B030, 0xC5E1E425, + 0x48404808, 0x49717839, 0x87939417, 0xCCF0FC3C, 0x0E121C1E, 0x82828002, + 0x01212021, 0x8C808C0C, 0x0B13181B, 0x4F535C1F, 0x47737437, 0x44505414, + 0x82B2B032, 0x0D111C1D, 0x05212425, 0x4F434C0F, 0x00000000, 0x46424406, + 0xCDE1EC2D, 0x48505818, 0x42525012, 0xCBE3E82B, 0x4E727C3E, 0xCAD2D81A, + 0xC9C1C809, 0xCDF1FC3D, 0x00303030, 0x85919415, 0x45616425, 0x0C303C3C, + 0x86B2B436, 0xC4E0E424, 0x8BB3B83B, 0x4C707C3C, 0x0E020C0E, 0x40505010, + 0x09313839, 0x06222426, 0x02323032, 0x84808404, 0x49616829, 0x83939013, + 0x07333437, 0xC7E3E427, 0x04202424, 0x84A0A424, 0xCBC3C80B, 0x43535013, + 0x0A02080A, 0x87838407, 0xC9D1D819, 0x4C404C0C, 0x83838003, 0x8F838C0F, + 0xCEC2CC0E, 0x0B33383B, 0x4A42480A, 0x87B3B437 }; + +/* +* SEED G Function +*/ +inline uint32_t SEED_G(uint32_t X) + { + return (SEED_S0[get_byte(3, X)] ^ SEED_S1[get_byte(2, X)] ^ + SEED_S2[get_byte(1, X)] ^ SEED_S3[get_byte(0, X)]); + } + +} + +/* +* SEED Encryption +*/ +void SEED::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_K.empty() == false); + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t B0 = load_be<uint32_t>(in, 0); + uint32_t B1 = load_be<uint32_t>(in, 1); + uint32_t B2 = load_be<uint32_t>(in, 2); + uint32_t B3 = load_be<uint32_t>(in, 3); + + for(size_t j = 0; j != 16; j += 2) + { + uint32_t T0, T1; + + T0 = B2 ^ m_K[2*j]; + T1 = SEED_G(B2 ^ B3 ^ m_K[2*j+1]); + T0 = SEED_G(T1 + T0); + T1 = SEED_G(T1 + T0); + B1 ^= T1; + B0 ^= T0 + T1; + + T0 = B0 ^ m_K[2*j+2]; + T1 = SEED_G(B0 ^ B1 ^ m_K[2*j+3]); + T0 = SEED_G(T1 + T0); + T1 = SEED_G(T1 + T0); + B3 ^= T1; + B2 ^= T0 + T1; + } + + store_be(out, B2, B3, B0, B1); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* SEED Decryption +*/ +void SEED::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_K.empty() == false); + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t B0 = load_be<uint32_t>(in, 0); + uint32_t B1 = load_be<uint32_t>(in, 1); + uint32_t B2 = load_be<uint32_t>(in, 2); + uint32_t B3 = load_be<uint32_t>(in, 3); + + for(size_t j = 0; j != 16; j += 2) + { + uint32_t T0, T1; + + T0 = B2 ^ m_K[30-2*j]; + T1 = SEED_G(B2 ^ B3 ^ m_K[31-2*j]); + T0 = SEED_G(T1 + T0); + T1 = SEED_G(T1 + T0); + B1 ^= T1; + B0 ^= T0 + T1; + + T0 = B0 ^ m_K[28-2*j]; + T1 = SEED_G(B0 ^ B1 ^ m_K[29-2*j]); + T0 = SEED_G(T1 + T0); + T1 = SEED_G(T1 + T0); + B3 ^= T1; + B2 ^= T0 + T1; + } + + store_be(out, B2, B3, B0, B1); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* SEED Key Schedule +*/ +void SEED::key_schedule(const uint8_t key[], size_t) + { + const uint32_t RC[16] = { + 0x9E3779B9, 0x3C6EF373, 0x78DDE6E6, 0xF1BBCDCC, + 0xE3779B99, 0xC6EF3733, 0x8DDE6E67, 0x1BBCDCCF, + 0x3779B99E, 0x6EF3733C, 0xDDE6E678, 0xBBCDCCF1, + 0x779B99E3, 0xEF3733C6, 0xDE6E678D, 0xBCDCCF1B + }; + + secure_vector<uint32_t> WK(4); + + for(size_t i = 0; i != 4; ++i) + WK[i] = load_be<uint32_t>(key, i); + + m_K.resize(32); + + for(size_t i = 0; i != 16; i += 2) + { + m_K[2*i ] = SEED_G(WK[0] + WK[2] - RC[i]); + m_K[2*i+1] = SEED_G(WK[1] - WK[3] + RC[i]) ^ m_K[2*i]; + + uint32_t T = (WK[0] & 0xFF) << 24; + WK[0] = (WK[0] >> 8) | (get_byte(3, WK[1]) << 24); + WK[1] = (WK[1] >> 8) | T; + + m_K[2*i+2] = SEED_G(WK[0] + WK[2] - RC[i+1]); + m_K[2*i+3] = SEED_G(WK[1] - WK[3] + RC[i+1]) ^ m_K[2*i+2]; + + T = get_byte(0, WK[3]); + WK[3] = (WK[3] << 8) | get_byte(0, WK[2]); + WK[2] = (WK[2] << 8) | T; + } + } + +void SEED::clear() + { + zap(m_K); + } + +} diff --git a/comm/third_party/botan/src/lib/block/seed/seed.h b/comm/third_party/botan/src/lib/block/seed/seed.h new file mode 100644 index 0000000000..a616243135 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/seed/seed.h @@ -0,0 +1,37 @@ +/* +* SEED +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_SEED_H_ +#define BOTAN_SEED_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(seed.h) + +namespace Botan { + +/** +* SEED, a Korean block cipher +*/ +class BOTAN_PUBLIC_API(2,0) SEED final : public Block_Cipher_Fixed_Params<16, 16> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string name() const override { return "SEED"; } + BlockCipher* clone() const override { return new SEED; } + private: + void key_schedule(const uint8_t[], size_t) override; + + secure_vector<uint32_t> m_K; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/serpent/info.txt b/comm/third_party/botan/src/lib/block/serpent/info.txt new file mode 100644 index 0000000000..89b860ce4f --- /dev/null +++ b/comm/third_party/botan/src/lib/block/serpent/info.txt @@ -0,0 +1,11 @@ +<defines> +SERPENT -> 20131128 +</defines> + +<header:public> +serpent.h +</header:public> + +<header:internal> +serpent_sbox.h +</header:internal> diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent.cpp b/comm/third_party/botan/src/lib/block/serpent/serpent.cpp new file mode 100644 index 0000000000..ff37a177c7 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/serpent/serpent.cpp @@ -0,0 +1,299 @@ +/* +* Serpent +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/serpent.h> +#include <botan/loadstor.h> +#include <botan/rotate.h> +#include <botan/internal/serpent_sbox.h> + +#if defined(BOTAN_HAS_SERPENT_SIMD) || defined(BOTAN_HAS_SERPENT_AVX2) + #include <botan/cpuid.h> +#endif + +namespace Botan { + +namespace { + +/* +* Serpent's Linear Transform +*/ +inline void transform(uint32_t& B0, uint32_t& B1, uint32_t& B2, uint32_t& B3) + { + B0 = rotl<13>(B0); B2 = rotl<3>(B2); + B1 ^= B0 ^ B2; B3 ^= B2 ^ (B0 << 3); + B1 = rotl<1>(B1); B3 = rotl<7>(B3); + B0 ^= B1 ^ B3; B2 ^= B3 ^ (B1 << 7); + B0 = rotl<5>(B0); B2 = rotl<22>(B2); + } + +/* +* Serpent's Inverse Linear Transform +*/ +inline void i_transform(uint32_t& B0, uint32_t& B1, uint32_t& B2, uint32_t& B3) + { + B2 = rotr<22>(B2); B0 = rotr<5>(B0); + B2 ^= B3 ^ (B1 << 7); B0 ^= B1 ^ B3; + B3 = rotr<7>(B3); B1 = rotr<1>(B1); + B3 ^= B2 ^ (B0 << 3); B1 ^= B0 ^ B2; + B2 = rotr<3>(B2); B0 = rotr<13>(B0); + } + +} + +/* +* XOR a key block with a data block +*/ +#define key_xor(round, B0, B1, B2, B3) \ + B0 ^= m_round_key[4*round ]; \ + B1 ^= m_round_key[4*round+1]; \ + B2 ^= m_round_key[4*round+2]; \ + B3 ^= m_round_key[4*round+3]; + +/* +* Serpent Encryption +*/ +void Serpent::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_round_key.empty() == false); + +#if defined(BOTAN_HAS_SERPENT_AVX2) + if(CPUID::has_avx2()) + { + while(blocks >= 8) + { + avx2_encrypt_8(in, out); + in += 8 * BLOCK_SIZE; + out += 8 * BLOCK_SIZE; + blocks -= 8; + } + } +#endif + +#if defined(BOTAN_HAS_SERPENT_SIMD) + if(CPUID::has_simd_32()) + { + while(blocks >= 4) + { + simd_encrypt_4(in, out); + in += 4 * BLOCK_SIZE; + out += 4 * BLOCK_SIZE; + blocks -= 4; + } + } +#endif + + BOTAN_PARALLEL_SIMD_FOR(size_t i = 0; i < blocks; ++i) + { + uint32_t B0, B1, B2, B3; + load_le(in + 16*i, B0, B1, B2, B3); + + key_xor( 0,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 1,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 2,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 3,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 4,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 5,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 6,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 7,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 8,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 9,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(10,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(11,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(12,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(13,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(14,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(15,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(16,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(17,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(18,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(19,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(20,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(21,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(22,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(23,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(24,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(25,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(26,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(27,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(28,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(29,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(30,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(31,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3); + + store_le(out + 16*i, B0, B1, B2, B3); + } + } + +/* +* Serpent Decryption +*/ +void Serpent::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_round_key.empty() == false); + +#if defined(BOTAN_HAS_SERPENT_AVX2) + if(CPUID::has_avx2()) + { + while(blocks >= 8) + { + avx2_decrypt_8(in, out); + in += 8 * BLOCK_SIZE; + out += 8 * BLOCK_SIZE; + blocks -= 8; + } + } +#endif + +#if defined(BOTAN_HAS_SERPENT_SIMD) + if(CPUID::has_simd_32()) + { + while(blocks >= 4) + { + simd_decrypt_4(in, out); + in += 4 * BLOCK_SIZE; + out += 4 * BLOCK_SIZE; + blocks -= 4; + } + } +#endif + + BOTAN_PARALLEL_SIMD_FOR(size_t i = 0; i < blocks; ++i) + { + uint32_t B0, B1, B2, B3; + load_le(in + 16*i, B0, B1, B2, B3); + + key_xor(32,B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(28,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(27,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(24,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(21,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(20,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(19,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(16,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(13,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(12,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(11,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 8,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor( 5,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor( 4,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor( 3,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3); + + store_le(out + 16*i, B0, B1, B2, B3); + } + } + +#undef key_xor +#undef transform +#undef i_transform + +/* +* Serpent Key Schedule +*/ +void Serpent::key_schedule(const uint8_t key[], size_t length) + { + const uint32_t PHI = 0x9E3779B9; + + secure_vector<uint32_t> W(140); + for(size_t i = 0; i != length / 4; ++i) + W[i] = load_le<uint32_t>(key, i); + + W[length / 4] |= uint32_t(1) << ((length%4)*8); + + for(size_t i = 8; i != 140; ++i) + { + uint32_t wi = W[i-8] ^ W[i-5] ^ W[i-3] ^ W[i-1] ^ PHI ^ uint32_t(i-8); + W[i] = rotl<11>(wi); + } + + SBoxE0(W[ 20],W[ 21],W[ 22],W[ 23]); + SBoxE0(W[ 52],W[ 53],W[ 54],W[ 55]); + SBoxE0(W[ 84],W[ 85],W[ 86],W[ 87]); + SBoxE0(W[116],W[117],W[118],W[119]); + + SBoxE1(W[ 16],W[ 17],W[ 18],W[ 19]); + SBoxE1(W[ 48],W[ 49],W[ 50],W[ 51]); + SBoxE1(W[ 80],W[ 81],W[ 82],W[ 83]); + SBoxE1(W[112],W[113],W[114],W[115]); + + SBoxE2(W[ 12],W[ 13],W[ 14],W[ 15]); + SBoxE2(W[ 44],W[ 45],W[ 46],W[ 47]); + SBoxE2(W[ 76],W[ 77],W[ 78],W[ 79]); + SBoxE2(W[108],W[109],W[110],W[111]); + + SBoxE3(W[ 8],W[ 9],W[ 10],W[ 11]); + SBoxE3(W[ 40],W[ 41],W[ 42],W[ 43]); + SBoxE3(W[ 72],W[ 73],W[ 74],W[ 75]); + SBoxE3(W[104],W[105],W[106],W[107]); + SBoxE3(W[136],W[137],W[138],W[139]); + + SBoxE4(W[ 36],W[ 37],W[ 38],W[ 39]); + SBoxE4(W[ 68],W[ 69],W[ 70],W[ 71]); + SBoxE4(W[100],W[101],W[102],W[103]); + SBoxE4(W[132],W[133],W[134],W[135]); + + SBoxE5(W[ 32],W[ 33],W[ 34],W[ 35]); + SBoxE5(W[ 64],W[ 65],W[ 66],W[ 67]); + SBoxE5(W[ 96],W[ 97],W[ 98],W[ 99]); + SBoxE5(W[128],W[129],W[130],W[131]); + + SBoxE6(W[ 28],W[ 29],W[ 30],W[ 31]); + SBoxE6(W[ 60],W[ 61],W[ 62],W[ 63]); + SBoxE6(W[ 92],W[ 93],W[ 94],W[ 95]); + SBoxE6(W[124],W[125],W[126],W[127]); + + SBoxE7(W[ 24],W[ 25],W[ 26],W[ 27]); + SBoxE7(W[ 56],W[ 57],W[ 58],W[ 59]); + SBoxE7(W[ 88],W[ 89],W[ 90],W[ 91]); + SBoxE7(W[120],W[121],W[122],W[123]); + + m_round_key.assign(W.begin() + 8, W.end()); + } + +void Serpent::clear() + { + zap(m_round_key); + } + +std::string Serpent::provider() const + { +#if defined(BOTAN_HAS_SERPENT_AVX2) + if(CPUID::has_avx2()) + { + return "avx2"; + } +#endif + +#if defined(BOTAN_HAS_SERPENT_SIMD) + if(CPUID::has_simd_32()) + { + return "simd"; + } +#endif + + return "base"; + } + +#undef key_xor + +} diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent.h b/comm/third_party/botan/src/lib/block/serpent/serpent.h new file mode 100644 index 0000000000..64eb8a8b04 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/serpent/serpent.h @@ -0,0 +1,53 @@ +/* +* Serpent +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_SERPENT_H_ +#define BOTAN_SERPENT_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(serpent.h) + +namespace Botan { + +/** +* Serpent is the most conservative of the AES finalists +* https://www.cl.cam.ac.uk/~rja14/serpent.html +*/ +class BOTAN_PUBLIC_API(2,0) Serpent final : public Block_Cipher_Fixed_Params<16, 16, 32, 8> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string provider() const override; + std::string name() const override { return "Serpent"; } + BlockCipher* clone() const override { return new Serpent; } + + size_t parallelism() const override { return 4; } + + private: + +#if defined(BOTAN_HAS_SERPENT_SIMD) + void simd_encrypt_4(const uint8_t in[64], uint8_t out[64]) const; + void simd_decrypt_4(const uint8_t in[64], uint8_t out[64]) const; +#endif + +#if defined(BOTAN_HAS_SERPENT_AVX2) + void avx2_encrypt_8(const uint8_t in[64], uint8_t out[64]) const; + void avx2_decrypt_8(const uint8_t in[64], uint8_t out[64]) const; +#endif + + void key_schedule(const uint8_t key[], size_t length) override; + + secure_vector<uint32_t> m_round_key; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/info.txt b/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/info.txt new file mode 100644 index 0000000000..b0fbfb334e --- /dev/null +++ b/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/info.txt @@ -0,0 +1,17 @@ +<defines> +SERPENT_AVX2 -> 20180824 +</defines> + +<isa> +avx2 +</isa> + +<requires> +simd_avx2 +</requires> + +# We must exclude MSVC due to #2120 +<cc> +gcc +clang +</cc> diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp b/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp new file mode 100644 index 0000000000..0db332035d --- /dev/null +++ b/comm/third_party/botan/src/lib/block/serpent/serpent_avx2/serpent_avx2.cpp @@ -0,0 +1,169 @@ +/* +* (C) 2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/serpent.h> +#include <botan/internal/serpent_sbox.h> +#include <botan/internal/simd_avx2.h> + +namespace Botan { + + +#define key_xor(round, B0, B1, B2, B3) \ + do { \ + B0 ^= SIMD_8x32::splat(m_round_key[4*round ]); \ + B1 ^= SIMD_8x32::splat(m_round_key[4*round+1]); \ + B2 ^= SIMD_8x32::splat(m_round_key[4*round+2]); \ + B3 ^= SIMD_8x32::splat(m_round_key[4*round+3]); \ + } while(0) + +/* +* Serpent's linear transformations +*/ +#define transform(B0, B1, B2, B3) \ + do { \ + B0 = B0.rotl<13>(); \ + B2 = B2.rotl<3>(); \ + B1 ^= B0 ^ B2; \ + B3 ^= B2 ^ B0.shl<3>(); \ + B1 = B1.rotl<1>(); \ + B3 = B3.rotl<7>(); \ + B0 ^= B1 ^ B3; \ + B2 ^= B3 ^ B1.shl<7>(); \ + B0 = B0.rotl<5>(); \ + B2 = B2.rotl<22>(); \ + } while(0) + +#define i_transform(B0, B1, B2, B3) \ + do { \ + B2 = B2.rotr<22>(); \ + B0 = B0.rotr<5>(); \ + B2 ^= B3 ^ B1.shl<7>(); \ + B0 ^= B1 ^ B3; \ + B3 = B3.rotr<7>(); \ + B1 = B1.rotr<1>(); \ + B3 ^= B2 ^ B0.shl<3>(); \ + B1 ^= B0 ^ B2; \ + B2 = B2.rotr<3>(); \ + B0 = B0.rotr<13>(); \ + } while(0) + +BOTAN_FUNC_ISA("avx2") +void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const + { + SIMD_8x32::reset_registers(); + + SIMD_8x32 B0 = SIMD_8x32::load_le(in); + SIMD_8x32 B1 = SIMD_8x32::load_le(in + 32); + SIMD_8x32 B2 = SIMD_8x32::load_le(in + 64); + SIMD_8x32 B3 = SIMD_8x32::load_le(in + 96); + + SIMD_8x32::transpose(B0, B1, B2, B3); + + key_xor( 0,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 1,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 2,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 3,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 4,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 5,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 6,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 7,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 8,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 9,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(10,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(11,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(12,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(13,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(14,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(15,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(16,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(17,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(18,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(19,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(20,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(21,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(22,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(23,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(24,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(25,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(26,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(27,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(28,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(29,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(30,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(31,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3); + + SIMD_8x32::transpose(B0, B1, B2, B3); + B0.store_le(out); + B1.store_le(out + 32); + B2.store_le(out + 64); + B3.store_le(out + 96); + + SIMD_8x32::zero_registers(); + } + +BOTAN_FUNC_ISA("avx2") +void Serpent::avx2_decrypt_8(const uint8_t in[128], uint8_t out[128]) const + { + SIMD_8x32::reset_registers(); + + SIMD_8x32 B0 = SIMD_8x32::load_le(in); + SIMD_8x32 B1 = SIMD_8x32::load_le(in + 32); + SIMD_8x32 B2 = SIMD_8x32::load_le(in + 64); + SIMD_8x32 B3 = SIMD_8x32::load_le(in + 96); + + SIMD_8x32::transpose(B0, B1, B2, B3); + + key_xor(32,B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(28,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(27,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(24,B0,B1,B2,B3); + + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(21,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(20,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(19,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(16,B0,B1,B2,B3); + + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(13,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(12,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(11,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 8,B0,B1,B2,B3); + + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor( 5,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor( 4,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor( 3,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3); + + SIMD_8x32::transpose(B0, B1, B2, B3); + + B0.store_le(out); + B1.store_le(out + 32); + B2.store_le(out + 64); + B3.store_le(out + 96); + + SIMD_8x32::zero_registers(); + } + +#undef key_xor +#undef transform +#undef i_transform + +} diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent_sbox.h b/comm/third_party/botan/src/lib/block/serpent/serpent_sbox.h new file mode 100644 index 0000000000..31471e7247 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/serpent/serpent_sbox.h @@ -0,0 +1,446 @@ +/* +* Serpent SBox Expressions +* (C) 1999-2007,2013 Jack Lloyd +* +* The sbox expressions used here were discovered by Dag Arne Osvik and +* are described in his paper "Speeding Up Serpent". +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_SERPENT_SBOX_H_ +#define BOTAN_SERPENT_SBOX_H_ + +#include <botan/build.h> + +template<typename T> +BOTAN_FORCE_INLINE void SBoxE0(T& a, T& b, T& c, T& d) + { + d ^= a; + T t0 = b; + b &= d; + t0 ^= c; + b ^= a; + a |= d; + a ^= t0; + t0 ^= d; + d ^= c; + c |= b; + c ^= t0; + t0 = ~t0; + t0 |= b; + b ^= d; + b ^= t0; + d |= a; + b ^= d; + t0 ^= d; + d = a; + a = b; + b = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxE1(T& a, T& b, T& c, T& d) + { + a = ~a; + c = ~c; + T t0 = a; + a &= b; + c ^= a; + a |= d; + d ^= c; + b ^= a; + a ^= t0; + t0 |= b; + b ^= d; + c |= a; + c &= t0; + a ^= b; + b &= c; + b ^= a; + a &= c; + t0 ^= a; + a = c; + c = d; + d = b; + b = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxE2(T& a, T& b, T& c, T& d) + { + T t0 = a; + a &= c; + a ^= d; + c ^= b; + c ^= a; + d |= t0; + d ^= b; + t0 ^= c; + b = d; + d |= t0; + d ^= a; + a &= b; + t0 ^= a; + b ^= d; + b ^= t0; + a = c; + c = b; + b = d; + d = ~t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxE3(T& a, T& b, T& c, T& d) + { + T t0 = a; + a |= d; + d ^= b; + b &= t0; + t0 ^= c; + c ^= d; + d &= a; + t0 |= b; + d ^= t0; + a ^= b; + t0 &= a; + b ^= d; + t0 ^= c; + b |= a; + b ^= c; + a ^= d; + c = b; + b |= d; + a ^= b; + b = c; + c = d; + d = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxE4(T& a, T& b, T& c, T& d) + { + b ^= d; + d = ~d; + c ^= d; + d ^= a; + T t0 = b; + b &= d; + b ^= c; + t0 ^= d; + a ^= t0; + c &= t0; + c ^= a; + a &= b; + d ^= a; + t0 |= b; + t0 ^= a; + a |= d; + a ^= c; + c &= d; + a = ~a; + t0 ^= c; + c = a; + a = b; + b = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxE5(T& a, T& b, T& c, T& d) + { + a ^= b; + b ^= d; + d = ~d; + T t0 = b; + b &= a; + c ^= d; + b ^= c; + c |= t0; + t0 ^= d; + d &= b; + d ^= a; + t0 ^= b; + t0 ^= c; + c ^= a; + a &= d; + c = ~c; + a ^= t0; + t0 |= d; + t0 ^= c; + c = a; + a = b; + b = d; + d = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxE6(T& a, T& b, T& c, T& d) + { + c = ~c; + T t0 = d; + d &= a; + a ^= t0; + d ^= c; + c |= t0; + b ^= d; + c ^= a; + a |= b; + c ^= b; + t0 ^= a; + a |= d; + a ^= c; + t0 ^= d; + t0 ^= a; + d = ~d; + c &= t0; + d ^= c; + c = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxE7(T& a, T& b, T& c, T& d) + { + T t0 = b; + b |= c; + b ^= d; + t0 ^= c; + c ^= b; + d |= t0; + d &= a; + t0 ^= c; + d ^= b; + b |= t0; + b ^= a; + a |= t0; + a ^= c; + b ^= t0; + c ^= b; + b &= a; + b ^= t0; + c = ~c; + c |= a; + t0 ^= c; + c = b; + b = d; + d = a; + a = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxD0(T& a, T& b, T& c, T& d) + { + c = ~c; + T t0 = b; + b |= a; + t0 = ~t0; + b ^= c; + c |= t0; + b ^= d; + a ^= t0; + c ^= a; + a &= d; + t0 ^= a; + a |= b; + a ^= c; + d ^= t0; + c ^= b; + d ^= a; + d ^= b; + c &= d; + t0 ^= c; + c = b; + b = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxD1(T& a, T& b, T& c, T& d) + { + T t0 = b; + b ^= d; + d &= b; + t0 ^= c; + d ^= a; + a |= b; + c ^= d; + a ^= t0; + a |= c; + b ^= d; + a ^= b; + b |= d; + b ^= a; + t0 = ~t0; + t0 ^= b; + b |= a; + b ^= a; + b |= t0; + d ^= b; + b = a; + a = t0; + t0 = c; + c = d; + d = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxD2(T& a, T& b, T& c, T& d) + { + c ^= d; + d ^= a; + T t0 = d; + d &= c; + d ^= b; + b |= c; + b ^= t0; + t0 &= d; + c ^= d; + t0 &= a; + t0 ^= c; + c &= b; + c |= a; + d = ~d; + c ^= d; + a ^= d; + a &= b; + d ^= t0; + d ^= a; + a = b; + b = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxD3(T& a, T& b, T& c, T& d) + { + T t0 = c; + c ^= b; + a ^= c; + t0 &= c; + t0 ^= a; + a &= b; + b ^= d; + d |= t0; + c ^= d; + a ^= d; + b ^= t0; + d &= c; + d ^= b; + b ^= a; + b |= c; + a ^= d; + b ^= t0; + a ^= b; + t0 = a; + a = c; + c = d; + d = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxD4(T& a, T& b, T& c, T& d) + { + T t0 = c; + c &= d; + c ^= b; + b |= d; + b &= a; + t0 ^= c; + t0 ^= b; + b &= c; + a = ~a; + d ^= t0; + b ^= d; + d &= a; + d ^= c; + a ^= b; + c &= a; + d ^= a; + c ^= t0; + c |= d; + d ^= a; + c ^= b; + b = d; + d = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxD5(T& a, T& b, T& c, T& d) + { + b = ~b; + T t0 = d; + c ^= b; + d |= a; + d ^= c; + c |= b; + c &= a; + t0 ^= d; + c ^= t0; + t0 |= a; + t0 ^= b; + b &= c; + b ^= d; + t0 ^= c; + d &= t0; + t0 ^= b; + d ^= t0; + t0 = ~t0; + d ^= a; + a = b; + b = t0; + t0 = d; + d = c; + c = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxD6(T& a, T& b, T& c, T& d) + { + a ^= c; + T t0 = c; + c &= a; + t0 ^= d; + c = ~c; + d ^= b; + c ^= d; + t0 |= a; + a ^= c; + d ^= t0; + t0 ^= b; + b &= d; + b ^= a; + a ^= d; + a |= c; + d ^= b; + t0 ^= a; + a = b; + b = c; + c = t0; + } + +template<typename T> +BOTAN_FORCE_INLINE void SBoxD7(T& a, T& b, T& c, T& d) + { + T t0 = c; + c ^= a; + a &= d; + t0 |= d; + c = ~c; + d ^= b; + b |= a; + a ^= c; + c &= t0; + d &= t0; + b ^= c; + c ^= a; + a |= c; + t0 ^= b; + a ^= d; + d ^= t0; + t0 |= a; + d ^= c; + t0 ^= c; + c = b; + b = a; + a = d; + d = t0; + } + +#endif diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent_simd/info.txt b/comm/third_party/botan/src/lib/block/serpent/serpent_simd/info.txt new file mode 100644 index 0000000000..f7dadf33fc --- /dev/null +++ b/comm/third_party/botan/src/lib/block/serpent/serpent_simd/info.txt @@ -0,0 +1,7 @@ +<defines> +SERPENT_SIMD -> 20160903 +</defines> + +<requires> +simd +</requires> diff --git a/comm/third_party/botan/src/lib/block/serpent/serpent_simd/serpent_simd.cpp b/comm/third_party/botan/src/lib/block/serpent/serpent_simd/serpent_simd.cpp new file mode 100644 index 0000000000..8ac783ba5c --- /dev/null +++ b/comm/third_party/botan/src/lib/block/serpent/serpent_simd/serpent_simd.cpp @@ -0,0 +1,169 @@ +/* +* Serpent (SIMD) +* (C) 2009,2013 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/serpent.h> +#include <botan/internal/serpent_sbox.h> +#include <botan/internal/simd_32.h> + +namespace Botan { + +#define key_xor(round, B0, B1, B2, B3) \ + do { \ + B0 ^= SIMD_4x32::splat(m_round_key[4*round ]); \ + B1 ^= SIMD_4x32::splat(m_round_key[4*round+1]); \ + B2 ^= SIMD_4x32::splat(m_round_key[4*round+2]); \ + B3 ^= SIMD_4x32::splat(m_round_key[4*round+3]); \ + } while(0) + +/* +* Serpent's linear transformations +*/ +#define transform(B0, B1, B2, B3) \ + do { \ + B0 = B0.rotl<13>(); \ + B2 = B2.rotl<3>(); \ + B1 ^= B0 ^ B2; \ + B3 ^= B2 ^ B0.shl<3>(); \ + B1 = B1.rotl<1>(); \ + B3 = B3.rotl<7>(); \ + B0 ^= B1 ^ B3; \ + B2 ^= B3 ^ B1.shl<7>(); \ + B0 = B0.rotl<5>(); \ + B2 = B2.rotl<22>(); \ + } while(0) + +#define i_transform(B0, B1, B2, B3) \ + do { \ + B2 = B2.rotr<22>(); \ + B0 = B0.rotr<5>(); \ + B2 ^= B3 ^ B1.shl<7>(); \ + B0 ^= B1 ^ B3; \ + B3 = B3.rotr<7>(); \ + B1 = B1.rotr<1>(); \ + B3 ^= B2 ^ B0.shl<3>(); \ + B1 ^= B0 ^ B2; \ + B2 = B2.rotr<3>(); \ + B0 = B0.rotr<13>(); \ + } while(0) + +/* +* SIMD Serpent Encryption of 4 blocks in parallel +*/ +void Serpent::simd_encrypt_4(const uint8_t in[64], uint8_t out[64]) const + { + SIMD_4x32 B0 = SIMD_4x32::load_le(in); + SIMD_4x32 B1 = SIMD_4x32::load_le(in + 16); + SIMD_4x32 B2 = SIMD_4x32::load_le(in + 32); + SIMD_4x32 B3 = SIMD_4x32::load_le(in + 48); + + SIMD_4x32::transpose(B0, B1, B2, B3); + + key_xor( 0,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 1,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 2,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 3,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 4,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 5,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 6,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 7,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + + key_xor( 8,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor( 9,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(10,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(11,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(12,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(13,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(14,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(15,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + + key_xor(16,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(17,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(18,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(19,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(20,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(21,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(22,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(23,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3); + + key_xor(24,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(25,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(26,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(27,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(28,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(29,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(30,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3); + key_xor(31,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3); + + SIMD_4x32::transpose(B0, B1, B2, B3); + + B0.store_le(out); + B1.store_le(out + 16); + B2.store_le(out + 32); + B3.store_le(out + 48); + } + +/* +* SIMD Serpent Decryption of 4 blocks in parallel +*/ +void Serpent::simd_decrypt_4(const uint8_t in[64], uint8_t out[64]) const + { + SIMD_4x32 B0 = SIMD_4x32::load_le(in); + SIMD_4x32 B1 = SIMD_4x32::load_le(in + 16); + SIMD_4x32 B2 = SIMD_4x32::load_le(in + 32); + SIMD_4x32 B3 = SIMD_4x32::load_le(in + 48); + + SIMD_4x32::transpose(B0, B1, B2, B3); + + key_xor(32,B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(28,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(27,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(24,B0,B1,B2,B3); + + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(21,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(20,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(19,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(16,B0,B1,B2,B3); + + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(13,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(12,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(11,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 8,B0,B1,B2,B3); + + i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor( 5,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor( 4,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor( 3,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3); + i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3); + + SIMD_4x32::transpose(B0, B1, B2, B3); + + B0.store_le(out); + B1.store_le(out + 16); + B2.store_le(out + 32); + B3.store_le(out + 48); + } + +#undef key_xor +#undef transform +#undef i_transform + +} diff --git a/comm/third_party/botan/src/lib/block/shacal2/info.txt b/comm/third_party/botan/src/lib/block/shacal2/info.txt new file mode 100644 index 0000000000..62e00503f9 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/info.txt @@ -0,0 +1,5 @@ +<defines> +SHACAL2 -> 20170813 +</defines> + + diff --git a/comm/third_party/botan/src/lib/block/shacal2/shacal2.cpp b/comm/third_party/botan/src/lib/block/shacal2/shacal2.cpp new file mode 100644 index 0000000000..b0c57f2359 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/shacal2.cpp @@ -0,0 +1,280 @@ +/* +* SHACAL-2 +* (C) 2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/shacal2.h> +#include <botan/loadstor.h> +#include <botan/rotate.h> +#include <botan/cpuid.h> + +namespace Botan { + +namespace { + +inline void SHACAL2_Fwd(uint32_t A, uint32_t B, uint32_t C, uint32_t& D, + uint32_t E, uint32_t F, uint32_t G, uint32_t& H, + uint32_t RK) + { + const uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A); + const uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E); + + H += E_rho + ((E & F) ^ (~E & G)) + RK; + D += H; + H += A_rho + ((A & B) | ((A | B) & C)); + } + +inline void SHACAL2_Rev(uint32_t A, uint32_t B, uint32_t C, uint32_t& D, + uint32_t E, uint32_t F, uint32_t G, uint32_t& H, + uint32_t RK) + { + const uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A); + const uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E); + + H -= A_rho + ((A & B) | ((A | B) & C)); + D -= H; + H -= E_rho + ((E & F) ^ (~E & G)) + RK; + } + +} + +/* +* SHACAL2 Encryption +*/ +void SHACAL2::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_RK.empty() == false); + +#if defined(BOTAN_HAS_SHACAL2_X86) + if(CPUID::has_intel_sha()) + { + return x86_encrypt_blocks(in, out, blocks); + } +#endif + +#if defined(BOTAN_HAS_SHACAL2_AVX2) + if(CPUID::has_avx2()) + { + while(blocks >= 8) + { + avx2_encrypt_8(in, out); + in += 8*BLOCK_SIZE; + out += 8*BLOCK_SIZE; + blocks -= 8; + } + } +#endif + +#if defined(BOTAN_HAS_SHACAL2_SIMD) + if(CPUID::has_simd_32()) + { + while(blocks >= 4) + { + simd_encrypt_4(in, out); + in += 4*BLOCK_SIZE; + out += 4*BLOCK_SIZE; + blocks -= 4; + } + } +#endif + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t A = load_be<uint32_t>(in, 0); + uint32_t B = load_be<uint32_t>(in, 1); + uint32_t C = load_be<uint32_t>(in, 2); + uint32_t D = load_be<uint32_t>(in, 3); + uint32_t E = load_be<uint32_t>(in, 4); + uint32_t F = load_be<uint32_t>(in, 5); + uint32_t G = load_be<uint32_t>(in, 6); + uint32_t H = load_be<uint32_t>(in, 7); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]); + SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]); + SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]); + SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]); + SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]); + SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]); + SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]); + SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]); + } + + store_be(out, A, B, C, D, E, F, G, H); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* SHACAL2 Encryption +*/ +void SHACAL2::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_RK.empty() == false); + +#if defined(BOTAN_HAS_SHACAL2_AVX2) + if(CPUID::has_avx2()) + { + while(blocks >= 8) + { + avx2_decrypt_8(in, out); + in += 8*BLOCK_SIZE; + out += 8*BLOCK_SIZE; + blocks -= 8; + } + } +#endif + +#if defined(BOTAN_HAS_SHACAL2_SIMD) + if(CPUID::has_simd_32()) + { + while(blocks >= 4) + { + simd_decrypt_4(in, out); + in += 4*BLOCK_SIZE; + out += 4*BLOCK_SIZE; + blocks -= 4; + } + } +#endif + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t A = load_be<uint32_t>(in, 0); + uint32_t B = load_be<uint32_t>(in, 1); + uint32_t C = load_be<uint32_t>(in, 2); + uint32_t D = load_be<uint32_t>(in, 3); + uint32_t E = load_be<uint32_t>(in, 4); + uint32_t F = load_be<uint32_t>(in, 5); + uint32_t G = load_be<uint32_t>(in, 6); + uint32_t H = load_be<uint32_t>(in, 7); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]); + SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]); + SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]); + SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]); + SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]); + SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]); + SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]); + SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]); + } + + store_be(out, A, B, C, D, E, F, G, H); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* SHACAL2 Key Schedule +*/ +void SHACAL2::key_schedule(const uint8_t key[], size_t len) + { + const uint32_t RC[64] = { + 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, + 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, + 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, + 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, + 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, + 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, + 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, + 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, + 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, + 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, + 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, + 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, + 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, + 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, + 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, + 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2 + }; + + if(m_RK.empty()) + m_RK.resize(64); + else + clear_mem(m_RK.data(), m_RK.size()); + + load_be(m_RK.data(), key, len/4); + + for(size_t i = 16; i != 64; ++i) + { + const uint32_t sigma0_15 = rotr< 7>(m_RK[i-15]) ^ rotr<18>(m_RK[i-15]) ^ (m_RK[i-15] >> 3); + const uint32_t sigma1_2 = rotr<17>(m_RK[i- 2]) ^ rotr<19>(m_RK[i- 2]) ^ (m_RK[i- 2] >> 10); + m_RK[i] = m_RK[i-16] + sigma0_15 + m_RK[i-7] + sigma1_2; + } + + for(size_t i = 0; i != 64; ++i) + { + m_RK[i] += RC[i]; + } + } + +size_t SHACAL2::parallelism() const + { +#if defined(BOTAN_HAS_SHACAL2_X86) + if(CPUID::has_intel_sha()) + { + return 4; + } +#endif + +#if defined(BOTAN_HAS_SHACAL2_AVX2) + if(CPUID::has_avx2()) + { + return 8; + } +#endif + +#if defined(BOTAN_HAS_SHACAL2_SIMD) + if(CPUID::has_simd_32()) + { + return 4; + } +#endif + + return 1; + } + +std::string SHACAL2::provider() const + { +#if defined(BOTAN_HAS_SHACAL2_X86) + if(CPUID::has_intel_sha()) + { + return "intel_sha"; + } +#endif + +#if defined(BOTAN_HAS_SHACAL2_AVX2) + if(CPUID::has_avx2()) + { + return "avx2"; + } +#endif + +#if defined(BOTAN_HAS_SHACAL2_SIMD) + if(CPUID::has_simd_32()) + { + return "simd"; + } +#endif + + return "base"; + } + +/* +* Clear memory of sensitive data +*/ +void SHACAL2::clear() + { + zap(m_RK); + } + +} diff --git a/comm/third_party/botan/src/lib/block/shacal2/shacal2.h b/comm/third_party/botan/src/lib/block/shacal2/shacal2.h new file mode 100644 index 0000000000..b752a03390 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/shacal2.h @@ -0,0 +1,54 @@ +/* +* SHACAL-2 +* (C) 2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_SHACAL2_H_ +#define BOTAN_SHACAL2_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(shacal2.h) + +namespace Botan { + +/** +* SHACAL2 +*/ +class BOTAN_PUBLIC_API(2,3) SHACAL2 final : public Block_Cipher_Fixed_Params<32, 16, 64, 4> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + std::string provider() const override; + void clear() override; + std::string name() const override { return "SHACAL2"; } + BlockCipher* clone() const override { return new SHACAL2; } + size_t parallelism() const override; + + private: + void key_schedule(const uint8_t[], size_t) override; + +#if defined(BOTAN_HAS_SHACAL2_SIMD) + void simd_encrypt_4(const uint8_t in[], uint8_t out[]) const; + void simd_decrypt_4(const uint8_t in[], uint8_t out[]) const; +#endif + +#if defined(BOTAN_HAS_SHACAL2_AVX2) + void avx2_encrypt_8(const uint8_t in[], uint8_t out[]) const; + void avx2_decrypt_8(const uint8_t in[], uint8_t out[]) const; +#endif + +#if defined(BOTAN_HAS_SHACAL2_X86) + void x86_encrypt_blocks(const uint8_t in[], uint8_t out[], size_t blocks) const; +#endif + + secure_vector<uint32_t> m_RK; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/shacal2/shacal2_avx2/info.txt b/comm/third_party/botan/src/lib/block/shacal2/shacal2_avx2/info.txt new file mode 100644 index 0000000000..a0b5ce1a97 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/shacal2_avx2/info.txt @@ -0,0 +1,11 @@ +<defines> +SHACAL2_AVX2 -> 20180826 +</defines> + +<isa> +avx2 +</isa> + +<requires> +simd_avx2 +</requires> diff --git a/comm/third_party/botan/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp b/comm/third_party/botan/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp new file mode 100644 index 0000000000..a465a38286 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp @@ -0,0 +1,122 @@ +/* +* (C) 2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/shacal2.h> +#include <botan/internal/simd_avx2.h> + +namespace Botan { + +namespace { + +void BOTAN_FORCE_INLINE BOTAN_FUNC_ISA("avx2") + SHACAL2_Fwd(const SIMD_8x32& A, const SIMD_8x32& B, const SIMD_8x32& C, SIMD_8x32& D, + const SIMD_8x32& E, const SIMD_8x32& F, const SIMD_8x32& G, SIMD_8x32& H, + uint32_t RK) + { + H += E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_8x32::splat(RK); + D += H; + H += A.rho<2,13,22>() + ((A & B) | ((A | B) & C)); + } + +void BOTAN_FORCE_INLINE BOTAN_FUNC_ISA("avx2") + SHACAL2_Rev(const SIMD_8x32& A, const SIMD_8x32& B, const SIMD_8x32& C, SIMD_8x32& D, + const SIMD_8x32& E, const SIMD_8x32& F, const SIMD_8x32& G, SIMD_8x32& H, + uint32_t RK) + { + H -= A.rho<2,13,22>() + ((A & B) | ((A | B) & C)); + D -= H; + H -= E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_8x32::splat(RK); + } + +} + +void BOTAN_FUNC_ISA("avx2") SHACAL2::avx2_encrypt_8(const uint8_t in[], uint8_t out[]) const + { + SIMD_8x32::reset_registers(); + + SIMD_8x32 A = SIMD_8x32::load_be(in); + SIMD_8x32 B = SIMD_8x32::load_be(in+32); + SIMD_8x32 C = SIMD_8x32::load_be(in+64); + SIMD_8x32 D = SIMD_8x32::load_be(in+96); + + SIMD_8x32 E = SIMD_8x32::load_be(in+128); + SIMD_8x32 F = SIMD_8x32::load_be(in+160); + SIMD_8x32 G = SIMD_8x32::load_be(in+192); + SIMD_8x32 H = SIMD_8x32::load_be(in+224); + + SIMD_8x32::transpose(A, B, C, D, E, F, G, H); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]); + SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]); + SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]); + SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]); + SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]); + SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]); + SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]); + SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]); + } + + SIMD_8x32::transpose(A, B, C, D, E, F, G, H); + + A.store_be(out); + B.store_be(out+32); + C.store_be(out+64); + D.store_be(out+96); + + E.store_be(out+128); + F.store_be(out+160); + G.store_be(out+192); + H.store_be(out+224); + + SIMD_8x32::zero_registers(); + } + +BOTAN_FUNC_ISA("avx2") void SHACAL2::avx2_decrypt_8(const uint8_t in[], uint8_t out[]) const + { + SIMD_8x32::reset_registers(); + + SIMD_8x32 A = SIMD_8x32::load_be(in); + SIMD_8x32 B = SIMD_8x32::load_be(in+32); + SIMD_8x32 C = SIMD_8x32::load_be(in+64); + SIMD_8x32 D = SIMD_8x32::load_be(in+96); + + SIMD_8x32 E = SIMD_8x32::load_be(in+128); + SIMD_8x32 F = SIMD_8x32::load_be(in+160); + SIMD_8x32 G = SIMD_8x32::load_be(in+192); + SIMD_8x32 H = SIMD_8x32::load_be(in+224); + + SIMD_8x32::transpose(A, B, C, D, E, F, G, H); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]); + SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]); + SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]); + SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]); + SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]); + SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]); + SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]); + SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]); + } + + SIMD_8x32::transpose(A, B, C, D, E, F, G, H); + + A.store_be(out); + B.store_be(out+32); + C.store_be(out+64); + D.store_be(out+96); + + E.store_be(out+128); + F.store_be(out+160); + G.store_be(out+192); + H.store_be(out+224); + + SIMD_8x32::zero_registers(); + } + +} diff --git a/comm/third_party/botan/src/lib/block/shacal2/shacal2_simd/info.txt b/comm/third_party/botan/src/lib/block/shacal2/shacal2_simd/info.txt new file mode 100644 index 0000000000..8d715c668c --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/shacal2_simd/info.txt @@ -0,0 +1,8 @@ +<defines> +SHACAL2_SIMD -> 20170813 +</defines> + +<requires> +shacal2 +simd +</requires> diff --git a/comm/third_party/botan/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp b/comm/third_party/botan/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp new file mode 100644 index 0000000000..6d15faf1a6 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/shacal2_simd/shacal2_simd.cpp @@ -0,0 +1,119 @@ +/* +* SHACAL-2 using SIMD +* (C) 2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/shacal2.h> +#include <botan/internal/simd_32.h> + +namespace Botan { + +namespace { + +inline +void SHACAL2_Fwd(const SIMD_4x32& A, const SIMD_4x32& B, const SIMD_4x32& C, SIMD_4x32& D, + const SIMD_4x32& E, const SIMD_4x32& F, const SIMD_4x32& G, SIMD_4x32& H, + uint32_t RK) + { + H += E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_4x32::splat(RK); + D += H; + H += A.rho<2,13,22>() + ((A & B) | ((A | B) & C)); + } + +inline +void SHACAL2_Rev(const SIMD_4x32& A, const SIMD_4x32& B, const SIMD_4x32& C, SIMD_4x32& D, + const SIMD_4x32& E, const SIMD_4x32& F, const SIMD_4x32& G, SIMD_4x32& H, + uint32_t RK) + { + H -= A.rho<2,13,22>() + ((A & B) | ((A | B) & C)); + D -= H; + H -= E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_4x32::splat(RK); + } + +} + +void SHACAL2::simd_encrypt_4(const uint8_t in[], uint8_t out[]) const + { + SIMD_4x32 A = SIMD_4x32::load_be(in); + SIMD_4x32 E = SIMD_4x32::load_be(in+16); + SIMD_4x32 B = SIMD_4x32::load_be(in+32); + SIMD_4x32 F = SIMD_4x32::load_be(in+48); + + SIMD_4x32 C = SIMD_4x32::load_be(in+64); + SIMD_4x32 G = SIMD_4x32::load_be(in+80); + SIMD_4x32 D = SIMD_4x32::load_be(in+96); + SIMD_4x32 H = SIMD_4x32::load_be(in+112); + + SIMD_4x32::transpose(A, B, C, D); + SIMD_4x32::transpose(E, F, G, H); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]); + SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]); + SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]); + SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]); + SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]); + SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]); + SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]); + SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]); + } + + SIMD_4x32::transpose(A, B, C, D); + SIMD_4x32::transpose(E, F, G, H); + + A.store_be(out); + E.store_be(out+16); + B.store_be(out+32); + F.store_be(out+48); + + C.store_be(out+64); + G.store_be(out+80); + D.store_be(out+96); + H.store_be(out+112); + } + +void SHACAL2::simd_decrypt_4(const uint8_t in[], uint8_t out[]) const + { + SIMD_4x32 A = SIMD_4x32::load_be(in); + SIMD_4x32 E = SIMD_4x32::load_be(in+16); + SIMD_4x32 B = SIMD_4x32::load_be(in+32); + SIMD_4x32 F = SIMD_4x32::load_be(in+48); + + SIMD_4x32 C = SIMD_4x32::load_be(in+64); + SIMD_4x32 G = SIMD_4x32::load_be(in+80); + SIMD_4x32 D = SIMD_4x32::load_be(in+96); + SIMD_4x32 H = SIMD_4x32::load_be(in+112); + + SIMD_4x32::transpose(A, B, C, D); + SIMD_4x32::transpose(E, F, G, H); + + for(size_t r = 0; r != 64; r += 8) + { + SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]); + SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]); + SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]); + SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]); + SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]); + SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]); + SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]); + SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]); + } + + SIMD_4x32::transpose(A, B, C, D); + SIMD_4x32::transpose(E, F, G, H); + + A.store_be(out); + E.store_be(out+16); + B.store_be(out+32); + F.store_be(out+48); + + C.store_be(out+64); + G.store_be(out+80); + D.store_be(out+96); + H.store_be(out+112); + } + +} diff --git a/comm/third_party/botan/src/lib/block/shacal2/shacal2_x86/info.txt b/comm/third_party/botan/src/lib/block/shacal2/shacal2_x86/info.txt new file mode 100644 index 0000000000..2988330482 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/shacal2_x86/info.txt @@ -0,0 +1,20 @@ +<defines> +SHACAL2_X86 -> 20170814 +</defines> + +<requires> +shacal2 +</requires> + +<isa> +sha +sse2 +ssse3 +</isa> + +<cc> +gcc:5.0 +clang:3.9 +msvc:19.0 # MSVS 2015 +</cc> + diff --git a/comm/third_party/botan/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp b/comm/third_party/botan/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp new file mode 100644 index 0000000000..1611d6c9b6 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/shacal2/shacal2_x86/shacal2_x86.cpp @@ -0,0 +1,118 @@ +/* +* SHACAL-2 using x86 SHA extensions +* (C) 2017 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/shacal2.h> +#include <immintrin.h> + +namespace Botan { + +/* +Only encryption is supported since the inverse round function would +require a different instruction +*/ + +BOTAN_FUNC_ISA("sha,ssse3") +void SHACAL2::x86_encrypt_blocks(const uint8_t in[], uint8_t out[], size_t blocks) const + { + const __m128i MASK1 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7); + const __m128i MASK2 = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15); + + const __m128i* RK_mm = reinterpret_cast<const __m128i*>(m_RK.data()); + const __m128i* in_mm = reinterpret_cast<const __m128i*>(in); + __m128i* out_mm = reinterpret_cast<__m128i*>(out); + + while(blocks >= 2) + { + __m128i B0_0 = _mm_loadu_si128(in_mm); + __m128i B0_1 = _mm_loadu_si128(in_mm+1); + __m128i B1_0 = _mm_loadu_si128(in_mm+2); + __m128i B1_1 = _mm_loadu_si128(in_mm+3); + + __m128i TMP = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0_0, B0_1), MASK2); + B0_1 = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0_0, B0_1), MASK2); + B0_0 = TMP; + + TMP = _mm_shuffle_epi8(_mm_unpacklo_epi64(B1_0, B1_1), MASK2); + B1_1 = _mm_shuffle_epi8(_mm_unpackhi_epi64(B1_0, B1_1), MASK2); + B1_0 = TMP; + + for(size_t i = 0; i != 8; ++i) + { + const __m128i RK0 = _mm_loadu_si128(RK_mm + 2*i); + const __m128i RK2 = _mm_loadu_si128(RK_mm + 2*i+1); + const __m128i RK1 = _mm_srli_si128(RK0, 8); + const __m128i RK3 = _mm_srli_si128(RK2, 8); + + B0_1 = _mm_sha256rnds2_epu32(B0_1, B0_0, RK0); + B1_1 = _mm_sha256rnds2_epu32(B1_1, B1_0, RK0); + + B0_0 = _mm_sha256rnds2_epu32(B0_0, B0_1, RK1); + B1_0 = _mm_sha256rnds2_epu32(B1_0, B1_1, RK1); + + B0_1 = _mm_sha256rnds2_epu32(B0_1, B0_0, RK2); + B1_1 = _mm_sha256rnds2_epu32(B1_1, B1_0, RK2); + + B0_0 = _mm_sha256rnds2_epu32(B0_0, B0_1, RK3); + B1_0 = _mm_sha256rnds2_epu32(B1_0, B1_1, RK3); + } + + TMP = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0_0, B0_1), MASK1); + B0_1 = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0_0, B0_1), MASK1); + B0_0 = TMP; + + TMP = _mm_shuffle_epi8(_mm_unpackhi_epi64(B1_0, B1_1), MASK1); + B1_1 = _mm_shuffle_epi8(_mm_unpacklo_epi64(B1_0, B1_1), MASK1); + B1_0 = TMP; + + // Save state + _mm_storeu_si128(out_mm + 0, B0_0); + _mm_storeu_si128(out_mm + 1, B0_1); + _mm_storeu_si128(out_mm + 2, B1_0); + _mm_storeu_si128(out_mm + 3, B1_1); + + blocks -= 2; + in_mm += 4; + out_mm += 4; + } + + while(blocks) + { + __m128i B0 = _mm_loadu_si128(in_mm); + __m128i B1 = _mm_loadu_si128(in_mm+1); + + __m128i TMP = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK2); + B1 = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK2); + B0 = TMP; + + for(size_t i = 0; i != 8; ++i) + { + const __m128i RK0 = _mm_loadu_si128(RK_mm + 2*i); + const __m128i RK2 = _mm_loadu_si128(RK_mm + 2*i+1); + const __m128i RK1 = _mm_srli_si128(RK0, 8); + const __m128i RK3 = _mm_srli_si128(RK2, 8); + + B1 = _mm_sha256rnds2_epu32(B1, B0, RK0); + B0 = _mm_sha256rnds2_epu32(B0, B1, RK1); + B1 = _mm_sha256rnds2_epu32(B1, B0, RK2); + B0 = _mm_sha256rnds2_epu32(B0, B1, RK3); + } + + TMP = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK1); + B1 = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK1); + B0 = TMP; + + // Save state + _mm_storeu_si128(out_mm, B0); + _mm_storeu_si128(out_mm + 1, B1); + + blocks--; + in_mm += 2; + out_mm += 2; + } + } + +} diff --git a/comm/third_party/botan/src/lib/block/sm4/info.txt b/comm/third_party/botan/src/lib/block/sm4/info.txt new file mode 100644 index 0000000000..32561f6d6f --- /dev/null +++ b/comm/third_party/botan/src/lib/block/sm4/info.txt @@ -0,0 +1,3 @@ +<defines> +SM4 -> 20170716 +</defines> diff --git a/comm/third_party/botan/src/lib/block/sm4/sm4.cpp b/comm/third_party/botan/src/lib/block/sm4/sm4.cpp new file mode 100644 index 0000000000..8198330e6e --- /dev/null +++ b/comm/third_party/botan/src/lib/block/sm4/sm4.cpp @@ -0,0 +1,341 @@ +/* +* SM4 +* (C) 2017 Ribose Inc +* (C) 2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/sm4.h> +#include <botan/loadstor.h> +#include <botan/rotate.h> +#include <botan/cpuid.h> + +namespace Botan { + +namespace { + +alignas(64) +const uint8_t SM4_SBOX[256] = { +0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05, +0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, +0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62, +0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6, +0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, +0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35, +0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87, +0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, +0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1, +0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3, +0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, +0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51, +0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8, +0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, +0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84, +0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48 +}; + +/* +* SM4_SBOX_T[j] == L(SM4_SBOX[j]). +*/ +alignas(64) +const uint32_t SM4_SBOX_T[256] = { + 0x8ED55B5B, 0xD0924242, 0x4DEAA7A7, 0x06FDFBFB, 0xFCCF3333, 0x65E28787, + 0xC93DF4F4, 0x6BB5DEDE, 0x4E165858, 0x6EB4DADA, 0x44145050, 0xCAC10B0B, + 0x8828A0A0, 0x17F8EFEF, 0x9C2CB0B0, 0x11051414, 0x872BACAC, 0xFB669D9D, + 0xF2986A6A, 0xAE77D9D9, 0x822AA8A8, 0x46BCFAFA, 0x14041010, 0xCFC00F0F, + 0x02A8AAAA, 0x54451111, 0x5F134C4C, 0xBE269898, 0x6D482525, 0x9E841A1A, + 0x1E061818, 0xFD9B6666, 0xEC9E7272, 0x4A430909, 0x10514141, 0x24F7D3D3, + 0xD5934646, 0x53ECBFBF, 0xF89A6262, 0x927BE9E9, 0xFF33CCCC, 0x04555151, + 0x270B2C2C, 0x4F420D0D, 0x59EEB7B7, 0xF3CC3F3F, 0x1CAEB2B2, 0xEA638989, + 0x74E79393, 0x7FB1CECE, 0x6C1C7070, 0x0DABA6A6, 0xEDCA2727, 0x28082020, + 0x48EBA3A3, 0xC1975656, 0x80820202, 0xA3DC7F7F, 0xC4965252, 0x12F9EBEB, + 0xA174D5D5, 0xB38D3E3E, 0xC33FFCFC, 0x3EA49A9A, 0x5B461D1D, 0x1B071C1C, + 0x3BA59E9E, 0x0CFFF3F3, 0x3FF0CFCF, 0xBF72CDCD, 0x4B175C5C, 0x52B8EAEA, + 0x8F810E0E, 0x3D586565, 0xCC3CF0F0, 0x7D196464, 0x7EE59B9B, 0x91871616, + 0x734E3D3D, 0x08AAA2A2, 0xC869A1A1, 0xC76AADAD, 0x85830606, 0x7AB0CACA, + 0xB570C5C5, 0xF4659191, 0xB2D96B6B, 0xA7892E2E, 0x18FBE3E3, 0x47E8AFAF, + 0x330F3C3C, 0x674A2D2D, 0xB071C1C1, 0x0E575959, 0xE99F7676, 0xE135D4D4, + 0x661E7878, 0xB4249090, 0x360E3838, 0x265F7979, 0xEF628D8D, 0x38596161, + 0x95D24747, 0x2AA08A8A, 0xB1259494, 0xAA228888, 0x8C7DF1F1, 0xD73BECEC, + 0x05010404, 0xA5218484, 0x9879E1E1, 0x9B851E1E, 0x84D75353, 0x00000000, + 0x5E471919, 0x0B565D5D, 0xE39D7E7E, 0x9FD04F4F, 0xBB279C9C, 0x1A534949, + 0x7C4D3131, 0xEE36D8D8, 0x0A020808, 0x7BE49F9F, 0x20A28282, 0xD4C71313, + 0xE8CB2323, 0xE69C7A7A, 0x42E9ABAB, 0x43BDFEFE, 0xA2882A2A, 0x9AD14B4B, + 0x40410101, 0xDBC41F1F, 0xD838E0E0, 0x61B7D6D6, 0x2FA18E8E, 0x2BF4DFDF, + 0x3AF1CBCB, 0xF6CD3B3B, 0x1DFAE7E7, 0xE5608585, 0x41155454, 0x25A38686, + 0x60E38383, 0x16ACBABA, 0x295C7575, 0x34A69292, 0xF7996E6E, 0xE434D0D0, + 0x721A6868, 0x01545555, 0x19AFB6B6, 0xDF914E4E, 0xFA32C8C8, 0xF030C0C0, + 0x21F6D7D7, 0xBC8E3232, 0x75B3C6C6, 0x6FE08F8F, 0x691D7474, 0x2EF5DBDB, + 0x6AE18B8B, 0x962EB8B8, 0x8A800A0A, 0xFE679999, 0xE2C92B2B, 0xE0618181, + 0xC0C30303, 0x8D29A4A4, 0xAF238C8C, 0x07A9AEAE, 0x390D3434, 0x1F524D4D, + 0x764F3939, 0xD36EBDBD, 0x81D65757, 0xB7D86F6F, 0xEB37DCDC, 0x51441515, + 0xA6DD7B7B, 0x09FEF7F7, 0xB68C3A3A, 0x932FBCBC, 0x0F030C0C, 0x03FCFFFF, + 0xC26BA9A9, 0xBA73C9C9, 0xD96CB5B5, 0xDC6DB1B1, 0x375A6D6D, 0x15504545, + 0xB98F3636, 0x771B6C6C, 0x13ADBEBE, 0xDA904A4A, 0x57B9EEEE, 0xA9DE7777, + 0x4CBEF2F2, 0x837EFDFD, 0x55114444, 0xBDDA6767, 0x2C5D7171, 0x45400505, + 0x631F7C7C, 0x50104040, 0x325B6969, 0xB8DB6363, 0x220A2828, 0xC5C20707, + 0xF531C4C4, 0xA88A2222, 0x31A79696, 0xF9CE3737, 0x977AEDED, 0x49BFF6F6, + 0x992DB4B4, 0xA475D1D1, 0x90D34343, 0x5A124848, 0x58BAE2E2, 0x71E69797, + 0x64B6D2D2, 0x70B2C2C2, 0xAD8B2626, 0xCD68A5A5, 0xCB955E5E, 0x624B2929, + 0x3C0C3030, 0xCE945A5A, 0xAB76DDDD, 0x867FF9F9, 0xF1649595, 0x5DBBE6E6, + 0x35F2C7C7, 0x2D092424, 0xD1C61717, 0xD66FB9B9, 0xDEC51B1B, 0x94861212, + 0x78186060, 0x30F3C3C3, 0x897CF5F5, 0x5CEFB3B3, 0xD23AE8E8, 0xACDF7373, + 0x794C3535, 0xA0208080, 0x9D78E5E5, 0x56EDBBBB, 0x235E7D7D, 0xC63EF8F8, + 0x8BD45F5F, 0xE7C82F2F, 0xDD39E4E4, 0x68492121 }; + +inline uint32_t SM4_T_slow(uint32_t b) + { + const uint32_t t = make_uint32(SM4_SBOX[get_byte(0,b)], + SM4_SBOX[get_byte(1,b)], + SM4_SBOX[get_byte(2,b)], + SM4_SBOX[get_byte(3,b)]); + + // L linear transform + return t ^ rotl<2>(t) ^ rotl<10>(t) ^ rotl<18>(t) ^ rotl<24>(t); + } + +inline uint32_t SM4_T(uint32_t b) + { + return SM4_SBOX_T[get_byte(0,b)] ^ + rotr< 8>(SM4_SBOX_T[get_byte(1,b)]) ^ + rotr<16>(SM4_SBOX_T[get_byte(2,b)]) ^ + rotr<24>(SM4_SBOX_T[get_byte(3,b)]); + } + +// Variant of T for key schedule +inline uint32_t SM4_Tp(uint32_t b) + { + const uint32_t t = make_uint32(SM4_SBOX[get_byte(0,b)], + SM4_SBOX[get_byte(1,b)], + SM4_SBOX[get_byte(2,b)], + SM4_SBOX[get_byte(3,b)]); + + // L' linear transform + return t ^ rotl<13>(t) ^ rotl<23>(t); + } + +#define SM4_E_RNDS(B, R, F) do { \ + B##0 ^= F(B##1 ^ B##2 ^ B##3 ^ m_RK[4*R+0]); \ + B##1 ^= F(B##2 ^ B##3 ^ B##0 ^ m_RK[4*R+1]); \ + B##2 ^= F(B##3 ^ B##0 ^ B##1 ^ m_RK[4*R+2]); \ + B##3 ^= F(B##0 ^ B##1 ^ B##2 ^ m_RK[4*R+3]); \ + } while(0) + +#define SM4_D_RNDS(B, R, F) do { \ + B##0 ^= F(B##1 ^ B##2 ^ B##3 ^ m_RK[4*R+3]); \ + B##1 ^= F(B##2 ^ B##3 ^ B##0 ^ m_RK[4*R+2]); \ + B##2 ^= F(B##3 ^ B##0 ^ B##1 ^ m_RK[4*R+1]); \ + B##3 ^= F(B##0 ^ B##1 ^ B##2 ^ m_RK[4*R+0]); \ + } while(0) + +} + +/* +* SM4 Encryption +*/ +void SM4::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_RK.empty() == false); + +#if defined(BOTAN_HAS_SM4_ARMV8) + if(CPUID::has_arm_sm4()) + return sm4_armv8_encrypt(in, out, blocks); +#endif + + while(blocks >= 2) + { + uint32_t B0 = load_be<uint32_t>(in, 0); + uint32_t B1 = load_be<uint32_t>(in, 1); + uint32_t B2 = load_be<uint32_t>(in, 2); + uint32_t B3 = load_be<uint32_t>(in, 3); + + uint32_t C0 = load_be<uint32_t>(in, 4); + uint32_t C1 = load_be<uint32_t>(in, 5); + uint32_t C2 = load_be<uint32_t>(in, 6); + uint32_t C3 = load_be<uint32_t>(in, 7); + + SM4_E_RNDS(B, 0, SM4_T_slow); + SM4_E_RNDS(C, 0, SM4_T_slow); + SM4_E_RNDS(B, 1, SM4_T); + SM4_E_RNDS(C, 1, SM4_T); + SM4_E_RNDS(B, 2, SM4_T); + SM4_E_RNDS(C, 2, SM4_T); + SM4_E_RNDS(B, 3, SM4_T); + SM4_E_RNDS(C, 3, SM4_T); + SM4_E_RNDS(B, 4, SM4_T); + SM4_E_RNDS(C, 4, SM4_T); + SM4_E_RNDS(B, 5, SM4_T); + SM4_E_RNDS(C, 5, SM4_T); + SM4_E_RNDS(B, 6, SM4_T); + SM4_E_RNDS(C, 6, SM4_T); + SM4_E_RNDS(B, 7, SM4_T_slow); + SM4_E_RNDS(C, 7, SM4_T_slow); + + store_be(out, B3, B2, B1, B0, C3, C2, C1, C0); + + in += 2*BLOCK_SIZE; + out += 2*BLOCK_SIZE; + blocks -= 2; + } + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t B0 = load_be<uint32_t>(in, 0); + uint32_t B1 = load_be<uint32_t>(in, 1); + uint32_t B2 = load_be<uint32_t>(in, 2); + uint32_t B3 = load_be<uint32_t>(in, 3); + + SM4_E_RNDS(B, 0, SM4_T_slow); + SM4_E_RNDS(B, 1, SM4_T); + SM4_E_RNDS(B, 2, SM4_T); + SM4_E_RNDS(B, 3, SM4_T); + SM4_E_RNDS(B, 4, SM4_T); + SM4_E_RNDS(B, 5, SM4_T); + SM4_E_RNDS(B, 6, SM4_T); + SM4_E_RNDS(B, 7, SM4_T_slow); + + store_be(out, B3, B2, B1, B0); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +/* +* SM4 Decryption +*/ +void SM4::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_RK.empty() == false); + +#if defined(BOTAN_HAS_SM4_ARMV8) + if(CPUID::has_arm_sm4()) + return sm4_armv8_decrypt(in, out, blocks); +#endif + + while(blocks >= 2) + { + uint32_t B0 = load_be<uint32_t>(in, 0); + uint32_t B1 = load_be<uint32_t>(in, 1); + uint32_t B2 = load_be<uint32_t>(in, 2); + uint32_t B3 = load_be<uint32_t>(in, 3); + + uint32_t C0 = load_be<uint32_t>(in, 4); + uint32_t C1 = load_be<uint32_t>(in, 5); + uint32_t C2 = load_be<uint32_t>(in, 6); + uint32_t C3 = load_be<uint32_t>(in, 7); + + SM4_D_RNDS(B, 7, SM4_T_slow); + SM4_D_RNDS(C, 7, SM4_T_slow); + SM4_D_RNDS(B, 6, SM4_T); + SM4_D_RNDS(C, 6, SM4_T); + SM4_D_RNDS(B, 5, SM4_T); + SM4_D_RNDS(C, 5, SM4_T); + SM4_D_RNDS(B, 4, SM4_T); + SM4_D_RNDS(C, 4, SM4_T); + SM4_D_RNDS(B, 3, SM4_T); + SM4_D_RNDS(C, 3, SM4_T); + SM4_D_RNDS(B, 2, SM4_T); + SM4_D_RNDS(C, 2, SM4_T); + SM4_D_RNDS(B, 1, SM4_T); + SM4_D_RNDS(C, 1, SM4_T); + SM4_D_RNDS(B, 0, SM4_T_slow); + SM4_D_RNDS(C, 0, SM4_T_slow); + + store_be(out, B3, B2, B1, B0, C3, C2, C1, C0); + + in += 2*BLOCK_SIZE; + out += 2*BLOCK_SIZE; + blocks -= 2; + } + + for(size_t i = 0; i != blocks; ++i) + { + uint32_t B0 = load_be<uint32_t>(in, 0); + uint32_t B1 = load_be<uint32_t>(in, 1); + uint32_t B2 = load_be<uint32_t>(in, 2); + uint32_t B3 = load_be<uint32_t>(in, 3); + + SM4_D_RNDS(B, 7, SM4_T_slow); + SM4_D_RNDS(B, 6, SM4_T); + SM4_D_RNDS(B, 5, SM4_T); + SM4_D_RNDS(B, 4, SM4_T); + SM4_D_RNDS(B, 3, SM4_T); + SM4_D_RNDS(B, 2, SM4_T); + SM4_D_RNDS(B, 1, SM4_T); + SM4_D_RNDS(B, 0, SM4_T_slow); + + store_be(out, B3, B2, B1, B0); + + in += BLOCK_SIZE; + out += BLOCK_SIZE; + } + } + +#undef SM4_E_RNDS +#undef SM4_D_RNDS + +/* +* SM4 Key Schedule +*/ +void SM4::key_schedule(const uint8_t key[], size_t) + { + // System parameter or family key + const uint32_t FK[4] = { 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc }; + + const uint32_t CK[32] = { + 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269, + 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9, + 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249, + 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9, + 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229, + 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299, + 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209, + 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 + }; + + secure_vector<uint32_t> K(4); + K[0] = load_be<uint32_t>(key, 0) ^ FK[0]; + K[1] = load_be<uint32_t>(key, 1) ^ FK[1]; + K[2] = load_be<uint32_t>(key, 2) ^ FK[2]; + K[3] = load_be<uint32_t>(key, 3) ^ FK[3]; + + m_RK.resize(32); + for(size_t i = 0; i != 32; ++i) + { + K[i % 4] ^= SM4_Tp(K[(i+1)%4] ^ K[(i+2)%4] ^ K[(i+3)%4] ^ CK[i]); + m_RK[i] = K[i % 4]; + } + } + +void SM4::clear() + { + zap(m_RK); + } + +size_t SM4::parallelism() const + { +#if defined(BOTAN_HAS_SM4_ARMV8) + if(CPUID::has_arm_sm4()) + { + return 4; + } +#endif + + return 1; + } + +std::string SM4::provider() const + { +#if defined(BOTAN_HAS_SM4_ARMV8) + if(CPUID::has_arm_sm4()) + { + return "armv8"; + } +#endif + + return "base"; + } + +} diff --git a/comm/third_party/botan/src/lib/block/sm4/sm4.h b/comm/third_party/botan/src/lib/block/sm4/sm4.h new file mode 100644 index 0000000000..637ffd4bee --- /dev/null +++ b/comm/third_party/botan/src/lib/block/sm4/sm4.h @@ -0,0 +1,45 @@ +/* +* SM4 +* (C) 2017 Ribose Inc +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_SM4_H_ +#define BOTAN_SM4_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(sm4.h) + +namespace Botan { + +/** +* SM4 +*/ +class BOTAN_PUBLIC_API(2,2) SM4 final : public Block_Cipher_Fixed_Params<16, 16> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string name() const override { return "SM4"; } + BlockCipher* clone() const override { return new SM4; } + + std::string provider() const override; + size_t parallelism() const override; + private: + void key_schedule(const uint8_t[], size_t) override; + +#if defined(BOTAN_HAS_SM4_ARMV8) + void sm4_armv8_encrypt(const uint8_t in[], uint8_t out[], size_t blocks) const; + void sm4_armv8_decrypt(const uint8_t in[], uint8_t out[], size_t blocks) const; +#endif + + secure_vector<uint32_t> m_RK; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/sm4/sm4_armv8/info.txt b/comm/third_party/botan/src/lib/block/sm4/sm4_armv8/info.txt new file mode 100644 index 0000000000..03ff034395 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/sm4/sm4_armv8/info.txt @@ -0,0 +1,11 @@ +<defines> +SM4_ARMV8 -> 20180709 +</defines> + +<isa> +armv8sm4 +</isa> + +<cc> +gcc:8 +</cc> diff --git a/comm/third_party/botan/src/lib/block/sm4/sm4_armv8/sm4_armv8.cpp b/comm/third_party/botan/src/lib/block/sm4/sm4_armv8/sm4_armv8.cpp new file mode 100644 index 0000000000..9e7a71a102 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/sm4/sm4_armv8/sm4_armv8.cpp @@ -0,0 +1,174 @@ +/* +* (C) 2018 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/sm4.h> +#include <arm_neon.h> + +namespace Botan { + +namespace { + +static const uint8_t qswap_tbl[16] = { + 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 +}; + +static const uint8_t bswap_tbl[16] = { + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +}; + +inline uint32x4_t qswap_32(uint32x4_t B) + { + return vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(B), vld1q_u8(qswap_tbl))); + } + +inline uint32x4_t bswap_32(uint32x4_t B) + { + return vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(B))); + } + +/* + Swap both the quad-words and bytes within each word + equivalent to return bswap_32(qswap_32(B)) +*/ +inline uint32x4_t bqswap_32(uint32x4_t B) + { + return vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(B), vld1q_u8(bswap_tbl))); + } + +#define SM4_E(B0, B1, B2, B3, K) do { \ + B0 = vsm4eq_u32(B0, K); \ + B1 = vsm4eq_u32(B1, K); \ + B2 = vsm4eq_u32(B2, K); \ + B3 = vsm4eq_u32(B3, K); \ + } while(0) + +} + +void BOTAN_FUNC_ISA("arch=armv8.2-a+sm4") +SM4::sm4_armv8_encrypt(const uint8_t input8[], uint8_t output8[], size_t blocks) const + { + const uint32x4_t K0 = vld1q_u32(&m_RK[ 0]); + const uint32x4_t K1 = vld1q_u32(&m_RK[ 4]); + const uint32x4_t K2 = vld1q_u32(&m_RK[ 8]); + const uint32x4_t K3 = vld1q_u32(&m_RK[12]); + const uint32x4_t K4 = vld1q_u32(&m_RK[16]); + const uint32x4_t K5 = vld1q_u32(&m_RK[20]); + const uint32x4_t K6 = vld1q_u32(&m_RK[24]); + const uint32x4_t K7 = vld1q_u32(&m_RK[28]); + + const uint32_t* input32 = reinterpret_cast<const uint32_t*>(reinterpret_cast<const void*>(input8)); + uint32_t* output32 = reinterpret_cast<uint32_t*>(reinterpret_cast<void*>(output8)); + + while(blocks >= 4) + { + uint32x4_t B0 = bswap_32(vld1q_u32(input32)); + uint32x4_t B1 = bswap_32(vld1q_u32(input32+4)); + uint32x4_t B2 = bswap_32(vld1q_u32(input32+8)); + uint32x4_t B3 = bswap_32(vld1q_u32(input32+12)); + + SM4_E(B0, B1, B2, B3, K0); + SM4_E(B0, B1, B2, B3, K1); + SM4_E(B0, B1, B2, B3, K2); + SM4_E(B0, B1, B2, B3, K3); + SM4_E(B0, B1, B2, B3, K4); + SM4_E(B0, B1, B2, B3, K5); + SM4_E(B0, B1, B2, B3, K6); + SM4_E(B0, B1, B2, B3, K7); + + vst1q_u32(output32 , bqswap_32(B0)); + vst1q_u32(output32+ 4, bqswap_32(B1)); + vst1q_u32(output32+ 8, bqswap_32(B2)); + vst1q_u32(output32+12, bqswap_32(B3)); + + input32 += 4*4; + output32 += 4*4; + blocks -= 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + uint32x4_t B = bswap_32(vld1q_u32(input32)); + + B = vsm4eq_u32(B, K0); + B = vsm4eq_u32(B, K1); + B = vsm4eq_u32(B, K2); + B = vsm4eq_u32(B, K3); + B = vsm4eq_u32(B, K4); + B = vsm4eq_u32(B, K5); + B = vsm4eq_u32(B, K6); + B = vsm4eq_u32(B, K7); + + vst1q_u32(output32, bqswap_32(B)); + + input32 += 4; + output32 += 4; + } + } + +void BOTAN_FUNC_ISA("arch=armv8.2-a+sm4") +SM4::sm4_armv8_decrypt(const uint8_t input8[], uint8_t output8[], size_t blocks) const + { + const uint32x4_t K0 = qswap_32(vld1q_u32(&m_RK[ 0])); + const uint32x4_t K1 = qswap_32(vld1q_u32(&m_RK[ 4])); + const uint32x4_t K2 = qswap_32(vld1q_u32(&m_RK[ 8])); + const uint32x4_t K3 = qswap_32(vld1q_u32(&m_RK[12])); + const uint32x4_t K4 = qswap_32(vld1q_u32(&m_RK[16])); + const uint32x4_t K5 = qswap_32(vld1q_u32(&m_RK[20])); + const uint32x4_t K6 = qswap_32(vld1q_u32(&m_RK[24])); + const uint32x4_t K7 = qswap_32(vld1q_u32(&m_RK[28])); + + const uint32_t* input32 = reinterpret_cast<const uint32_t*>(reinterpret_cast<const void*>(input8)); + uint32_t* output32 = reinterpret_cast<uint32_t*>(reinterpret_cast<void*>(output8)); + + while(blocks >= 4) + { + uint32x4_t B0 = bswap_32(vld1q_u32(input32)); + uint32x4_t B1 = bswap_32(vld1q_u32(input32+4)); + uint32x4_t B2 = bswap_32(vld1q_u32(input32+8)); + uint32x4_t B3 = bswap_32(vld1q_u32(input32+12)); + + SM4_E(B0, B1, B2, B3, K7); + SM4_E(B0, B1, B2, B3, K6); + SM4_E(B0, B1, B2, B3, K5); + SM4_E(B0, B1, B2, B3, K4); + SM4_E(B0, B1, B2, B3, K3); + SM4_E(B0, B1, B2, B3, K2); + SM4_E(B0, B1, B2, B3, K1); + SM4_E(B0, B1, B2, B3, K0); + + vst1q_u32(output32 , bqswap_32(B0)); + vst1q_u32(output32+ 4, bqswap_32(B1)); + vst1q_u32(output32+ 8, bqswap_32(B2)); + vst1q_u32(output32+12, bqswap_32(B3)); + + input32 += 4*4; + output32 += 4*4; + blocks -= 4; + } + + for(size_t i = 0; i != blocks; ++i) + { + uint32x4_t B = bswap_32(vld1q_u32(input32)); + + B = vsm4eq_u32(B, K7); + B = vsm4eq_u32(B, K6); + B = vsm4eq_u32(B, K5); + B = vsm4eq_u32(B, K4); + B = vsm4eq_u32(B, K3); + B = vsm4eq_u32(B, K2); + B = vsm4eq_u32(B, K1); + B = vsm4eq_u32(B, K0); + + vst1q_u32(output32, bqswap_32(B)); + + input32 += 4; + output32 += 4; + } + } + +#undef SM4_E + +} diff --git a/comm/third_party/botan/src/lib/block/threefish_512/info.txt b/comm/third_party/botan/src/lib/block/threefish_512/info.txt new file mode 100644 index 0000000000..e122236794 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/threefish_512/info.txt @@ -0,0 +1,3 @@ +<defines> +THREEFISH_512 -> 20131224 +</defines> diff --git a/comm/third_party/botan/src/lib/block/threefish_512/threefish.h b/comm/third_party/botan/src/lib/block/threefish_512/threefish.h new file mode 100644 index 0000000000..f866a717ff --- /dev/null +++ b/comm/third_party/botan/src/lib/block/threefish_512/threefish.h @@ -0,0 +1,17 @@ +/* +* Threefish +* (C) 2013,2014 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_THREEFISH_H_ +#define BOTAN_THREEFISH_H_ + +// This header is deprecated and will be removed in a future major release + +#include <botan/threefish_512.h> + +BOTAN_DEPRECATED_HEADER(threefish.h) + +#endif diff --git a/comm/third_party/botan/src/lib/block/threefish_512/threefish_512.cpp b/comm/third_party/botan/src/lib/block/threefish_512/threefish_512.cpp new file mode 100644 index 0000000000..e34a9e0dca --- /dev/null +++ b/comm/third_party/botan/src/lib/block/threefish_512/threefish_512.cpp @@ -0,0 +1,273 @@ +/* +* Threefish-512 +* (C) 2013,2014,2016 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/threefish_512.h> +#include <botan/loadstor.h> +#include <botan/rotate.h> +#include <botan/cpuid.h> + +namespace Botan { + +#define THREEFISH_ROUND(X0,X1,X2,X3,X4,X5,X6,X7,ROT1,ROT2,ROT3,ROT4) \ + do { \ + X0 += X4; \ + X1 += X5; \ + X2 += X6; \ + X3 += X7; \ + X4 = rotl<ROT1>(X4); \ + X5 = rotl<ROT2>(X5); \ + X6 = rotl<ROT3>(X6); \ + X7 = rotl<ROT4>(X7); \ + X4 ^= X0; \ + X5 ^= X1; \ + X6 ^= X2; \ + X7 ^= X3; \ + } while(0) + +#define THREEFISH_INJECT_KEY(r) \ + do { \ + X0 += m_K[(r ) % 9]; \ + X1 += m_K[(r+1) % 9]; \ + X2 += m_K[(r+2) % 9]; \ + X3 += m_K[(r+3) % 9]; \ + X4 += m_K[(r+4) % 9]; \ + X5 += m_K[(r+5) % 9] + m_T[(r ) % 3]; \ + X6 += m_K[(r+6) % 9] + m_T[(r+1) % 3]; \ + X7 += m_K[(r+7) % 9] + (r); \ + } while(0) + +#define THREEFISH_ENC_8_ROUNDS(R1,R2) \ + do { \ + THREEFISH_ROUND(X0,X2,X4,X6, X1,X3,X5,X7, 46,36,19,37); \ + THREEFISH_ROUND(X2,X4,X6,X0, X1,X7,X5,X3, 33,27,14,42); \ + THREEFISH_ROUND(X4,X6,X0,X2, X1,X3,X5,X7, 17,49,36,39); \ + THREEFISH_ROUND(X6,X0,X2,X4, X1,X7,X5,X3, 44, 9,54,56); \ + THREEFISH_INJECT_KEY(R1); \ + \ + THREEFISH_ROUND(X0,X2,X4,X6, X1,X3,X5,X7, 39,30,34,24); \ + THREEFISH_ROUND(X2,X4,X6,X0, X1,X7,X5,X3, 13,50,10,17); \ + THREEFISH_ROUND(X4,X6,X0,X2, X1,X3,X5,X7, 25,29,39,43); \ + THREEFISH_ROUND(X6,X0,X2,X4, X1,X7,X5,X3, 8,35,56,22); \ + THREEFISH_INJECT_KEY(R2); \ + } while(0) + +void Threefish_512::skein_feedfwd(const secure_vector<uint64_t>& M, + const secure_vector<uint64_t>& T) + { + BOTAN_ASSERT(m_K.size() == 9, "Key was set"); + BOTAN_ASSERT(M.size() == 8, "Single block"); + + m_T[0] = T[0]; + m_T[1] = T[1]; + m_T[2] = T[0] ^ T[1]; + + uint64_t X0 = M[0]; + uint64_t X1 = M[1]; + uint64_t X2 = M[2]; + uint64_t X3 = M[3]; + uint64_t X4 = M[4]; + uint64_t X5 = M[5]; + uint64_t X6 = M[6]; + uint64_t X7 = M[7]; + + THREEFISH_INJECT_KEY(0); + + THREEFISH_ENC_8_ROUNDS(1,2); + THREEFISH_ENC_8_ROUNDS(3,4); + THREEFISH_ENC_8_ROUNDS(5,6); + THREEFISH_ENC_8_ROUNDS(7,8); + THREEFISH_ENC_8_ROUNDS(9,10); + THREEFISH_ENC_8_ROUNDS(11,12); + THREEFISH_ENC_8_ROUNDS(13,14); + THREEFISH_ENC_8_ROUNDS(15,16); + THREEFISH_ENC_8_ROUNDS(17,18); + + m_K[0] = M[0] ^ X0; + m_K[1] = M[1] ^ X1; + m_K[2] = M[2] ^ X2; + m_K[3] = M[3] ^ X3; + m_K[4] = M[4] ^ X4; + m_K[5] = M[5] ^ X5; + m_K[6] = M[6] ^ X6; + m_K[7] = M[7] ^ X7; + + m_K[8] = m_K[0] ^ m_K[1] ^ m_K[2] ^ m_K[3] ^ + m_K[4] ^ m_K[5] ^ m_K[6] ^ m_K[7] ^ 0x1BD11BDAA9FC1A22; + } + +size_t Threefish_512::parallelism() const + { +#if defined(BOTAN_HAS_THREEFISH_512_AVX2) + if(CPUID::has_avx2()) + { + return 2; + } +#endif + + return 1; + } + +std::string Threefish_512::provider() const + { +#if defined(BOTAN_HAS_THREEFISH_512_AVX2) + if(CPUID::has_avx2()) + { + return "avx2"; + } +#endif + + return "base"; + } + +void Threefish_512::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_K.empty() == false); + +#if defined(BOTAN_HAS_THREEFISH_512_AVX2) + if(CPUID::has_avx2()) + { + return avx2_encrypt_n(in, out, blocks); + } +#endif + + BOTAN_PARALLEL_SIMD_FOR(size_t i = 0; i < blocks; ++i) + { + uint64_t X0, X1, X2, X3, X4, X5, X6, X7; + load_le(in + BLOCK_SIZE*i, X0, X1, X2, X3, X4, X5, X6, X7); + + THREEFISH_INJECT_KEY(0); + + THREEFISH_ENC_8_ROUNDS(1,2); + THREEFISH_ENC_8_ROUNDS(3,4); + THREEFISH_ENC_8_ROUNDS(5,6); + THREEFISH_ENC_8_ROUNDS(7,8); + THREEFISH_ENC_8_ROUNDS(9,10); + THREEFISH_ENC_8_ROUNDS(11,12); + THREEFISH_ENC_8_ROUNDS(13,14); + THREEFISH_ENC_8_ROUNDS(15,16); + THREEFISH_ENC_8_ROUNDS(17,18); + + store_le(out + BLOCK_SIZE*i, X0, X1, X2, X3, X4, X5, X6, X7); + } + } + +#undef THREEFISH_ENC_8_ROUNDS +#undef THREEFISH_INJECT_KEY +#undef THREEFISH_ROUND + +void Threefish_512::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_K.empty() == false); + +#if defined(BOTAN_HAS_THREEFISH_512_AVX2) + if(CPUID::has_avx2()) + { + return avx2_decrypt_n(in, out, blocks); + } +#endif + +#define THREEFISH_ROUND(X0,X1,X2,X3,X4,X5,X6,X7,ROT1,ROT2,ROT3,ROT4) \ + do { \ + X4 ^= X0; \ + X5 ^= X1; \ + X6 ^= X2; \ + X7 ^= X3; \ + X4 = rotr<ROT1>(X4); \ + X5 = rotr<ROT2>(X5); \ + X6 = rotr<ROT3>(X6); \ + X7 = rotr<ROT4>(X7); \ + X0 -= X4; \ + X1 -= X5; \ + X2 -= X6; \ + X3 -= X7; \ + } while(0) + +#define THREEFISH_INJECT_KEY(r) \ + do { \ + X0 -= m_K[(r ) % 9]; \ + X1 -= m_K[(r+1) % 9]; \ + X2 -= m_K[(r+2) % 9]; \ + X3 -= m_K[(r+3) % 9]; \ + X4 -= m_K[(r+4) % 9]; \ + X5 -= m_K[(r+5) % 9] + m_T[(r ) % 3]; \ + X6 -= m_K[(r+6) % 9] + m_T[(r+1) % 3]; \ + X7 -= m_K[(r+7) % 9] + (r); \ + } while(0) + +#define THREEFISH_DEC_8_ROUNDS(R1,R2) \ + do { \ + THREEFISH_ROUND(X6,X0,X2,X4, X1,X7,X5,X3, 8,35,56,22); \ + THREEFISH_ROUND(X4,X6,X0,X2, X1,X3,X5,X7, 25,29,39,43); \ + THREEFISH_ROUND(X2,X4,X6,X0, X1,X7,X5,X3, 13,50,10,17); \ + THREEFISH_ROUND(X0,X2,X4,X6, X1,X3,X5,X7, 39,30,34,24); \ + THREEFISH_INJECT_KEY(R1); \ + \ + THREEFISH_ROUND(X6,X0,X2,X4, X1,X7,X5,X3, 44, 9,54,56); \ + THREEFISH_ROUND(X4,X6,X0,X2, X1,X3,X5,X7, 17,49,36,39); \ + THREEFISH_ROUND(X2,X4,X6,X0, X1,X7,X5,X3, 33,27,14,42); \ + THREEFISH_ROUND(X0,X2,X4,X6, X1,X3,X5,X7, 46,36,19,37); \ + THREEFISH_INJECT_KEY(R2); \ + } while(0) + + BOTAN_PARALLEL_SIMD_FOR(size_t i = 0; i < blocks; ++i) + { + uint64_t X0, X1, X2, X3, X4, X5, X6, X7; + load_le(in + BLOCK_SIZE*i, X0, X1, X2, X3, X4, X5, X6, X7); + + THREEFISH_INJECT_KEY(18); + + THREEFISH_DEC_8_ROUNDS(17,16); + THREEFISH_DEC_8_ROUNDS(15,14); + THREEFISH_DEC_8_ROUNDS(13,12); + THREEFISH_DEC_8_ROUNDS(11,10); + THREEFISH_DEC_8_ROUNDS(9,8); + THREEFISH_DEC_8_ROUNDS(7,6); + THREEFISH_DEC_8_ROUNDS(5,4); + THREEFISH_DEC_8_ROUNDS(3,2); + THREEFISH_DEC_8_ROUNDS(1,0); + + store_le(out + BLOCK_SIZE*i, X0, X1, X2, X3, X4, X5, X6, X7); + } + +#undef THREEFISH_DEC_8_ROUNDS +#undef THREEFISH_INJECT_KEY +#undef THREEFISH_ROUND + } + +void Threefish_512::set_tweak(const uint8_t tweak[], size_t len) + { + BOTAN_ARG_CHECK(len == 16, "Threefish-512 requires 128 bit tweak"); + + m_T.resize(3); + m_T[0] = load_le<uint64_t>(tweak, 0); + m_T[1] = load_le<uint64_t>(tweak, 1); + m_T[2] = m_T[0] ^ m_T[1]; + } + +void Threefish_512::key_schedule(const uint8_t key[], size_t) + { + // todo: define key schedule for smaller keys + m_K.resize(9); + + for(size_t i = 0; i != 8; ++i) + m_K[i] = load_le<uint64_t>(key, i); + + m_K[8] = m_K[0] ^ m_K[1] ^ m_K[2] ^ m_K[3] ^ + m_K[4] ^ m_K[5] ^ m_K[6] ^ m_K[7] ^ 0x1BD11BDAA9FC1A22; + + // Reset tweak to all zeros on key reset + m_T.resize(3); + zeroise(m_T); + } + +void Threefish_512::clear() + { + zap(m_K); + zap(m_T); + } + +} diff --git a/comm/third_party/botan/src/lib/block/threefish_512/threefish_512.h b/comm/third_party/botan/src/lib/block/threefish_512/threefish_512.h new file mode 100644 index 0000000000..f3adf71a92 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/threefish_512/threefish_512.h @@ -0,0 +1,57 @@ +/* +* Threefish-512 +* (C) 2013,2014 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_THREEFISH_512_H_ +#define BOTAN_THREEFISH_512_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(threefish_512.h) + +namespace Botan { + +/** +* Threefish-512 +*/ +class BOTAN_PUBLIC_API(2,0) Threefish_512 final : + public Block_Cipher_Fixed_Params<64, 64, 0, 1, Tweakable_Block_Cipher> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void set_tweak(const uint8_t tweak[], size_t len) override; + + void clear() override; + std::string provider() const override; + std::string name() const override { return "Threefish-512"; } + BlockCipher* clone() const override { return new Threefish_512; } + size_t parallelism() const override; + + private: + +#if defined(BOTAN_HAS_THREEFISH_512_AVX2) + void avx2_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; + void avx2_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const; +#endif + + void key_schedule(const uint8_t key[], size_t key_len) override; + + // Interface for Skein + friend class Skein_512; + + void skein_feedfwd(const secure_vector<uint64_t>& M, + const secure_vector<uint64_t>& T); + + // Private data + secure_vector<uint64_t> m_T; + secure_vector<uint64_t> m_K; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/threefish_512/threefish_512_avx2/info.txt b/comm/third_party/botan/src/lib/block/threefish_512/threefish_512_avx2/info.txt new file mode 100644 index 0000000000..b5374c4406 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/threefish_512/threefish_512_avx2/info.txt @@ -0,0 +1,14 @@ +<defines> +THREEFISH_512_AVX2 -> 20160903 +</defines> + +<isa> +avx2 +</isa> + +<cc> +gcc +clang +msvc +icc +</cc> diff --git a/comm/third_party/botan/src/lib/block/threefish_512/threefish_512_avx2/threefish_512_avx2.cpp b/comm/third_party/botan/src/lib/block/threefish_512/threefish_512_avx2/threefish_512_avx2.cpp new file mode 100644 index 0000000000..0ceea2d7f0 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/threefish_512/threefish_512_avx2/threefish_512_avx2.cpp @@ -0,0 +1,444 @@ +/* +* Threefish-512 using AVX2 +* (C) 2013,2016 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/threefish_512.h> +#include <immintrin.h> + +namespace Botan { + +namespace { + +BOTAN_FUNC_ISA("avx2") +inline void interleave_epi64(__m256i& X0, __m256i& X1) + { + // interleave X0 and X1 qwords + // (X0,X1,X2,X3),(X4,X5,X6,X7) -> (X0,X2,X4,X6),(X1,X3,X5,X7) + + const __m256i T0 = _mm256_unpacklo_epi64(X0, X1); + const __m256i T1 = _mm256_unpackhi_epi64(X0, X1); + + X0 = _mm256_permute4x64_epi64(T0, _MM_SHUFFLE(3,1,2,0)); + X1 = _mm256_permute4x64_epi64(T1, _MM_SHUFFLE(3,1,2,0)); + } + +BOTAN_FUNC_ISA("avx2") +inline void deinterleave_epi64(__m256i& X0, __m256i& X1) + { + const __m256i T0 = _mm256_permute4x64_epi64(X0, _MM_SHUFFLE(3,1,2,0)); + const __m256i T1 = _mm256_permute4x64_epi64(X1, _MM_SHUFFLE(3,1,2,0)); + + X0 = _mm256_unpacklo_epi64(T0, T1); + X1 = _mm256_unpackhi_epi64(T0, T1); + } + +BOTAN_FUNC_ISA("avx2") +inline void rotate_keys(__m256i& R0, __m256i& R1, __m256i R2) + { + /* + Behold. The key schedule progresses like so. The values + loop back to the originals after the rounds are complete + so we don't need to reload for starting the next block. + + R0 R1 R2 + K1,K2,K3 (7,5,3,1),(8,6,4,2),(0,7,5,3) + K3,K4,K5 (0,7,5,3),(1,8,6,4),(2,0,7,5) + K5,K6,K7 (2,0,7,5),(3,1,8,6),(4,2,0,7) + + K7,K8,K0 (4,2,0,7),(5,3,1,8),(6,4,2,0) + K0,K1,K2 (6,4,2,0),(7,5,3,1),(8,6,4,2) + K2,K3,K4 (8,6,4,2),(0,7,5,3),(1,8,6,4) + + K4,K5,K6 (1,8,6,4),(2,0,7,5),(3,1,8,6) + K6,K7,K8 (3,1,8,6),(4,2,0,7),(5,3,1,8) + K8,K0,K1 (5,3,1,8),(6,4,2,0),(7,5,3,1) + + To compute the values for the next round: + X0 is X2 from the last round + X1 becomes (X0[4],X1[1:3]) + X2 becomes (X1[4],X2[1:3]) + + Uses 3 permutes and 2 blends, is there a faster way? + */ + __m256i T0 = _mm256_permute4x64_epi64(R0, _MM_SHUFFLE(0,0,0,0)); + __m256i T1 = _mm256_permute4x64_epi64(R1, _MM_SHUFFLE(0,3,2,1)); + __m256i T2 = _mm256_permute4x64_epi64(R2, _MM_SHUFFLE(0,3,2,1)); + + R0 = _mm256_blend_epi32(T1, T0, 0xC0); + R1 = _mm256_blend_epi32(T2, T1, 0xC0); + } + + +} + +BOTAN_FUNC_ISA("avx2") +void Threefish_512::avx2_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + _mm256_zeroupper(); + + const uint64_t* K = m_K.data(); + const uint64_t* T_64 = m_T.data(); + + const __m256i ROTATE_1 = _mm256_set_epi64x(37,19,36,46); + const __m256i ROTATE_2 = _mm256_set_epi64x(42,14,27,33); + const __m256i ROTATE_3 = _mm256_set_epi64x(39,36,49,17); + const __m256i ROTATE_4 = _mm256_set_epi64x(56,54, 9,44); + const __m256i ROTATE_5 = _mm256_set_epi64x(24,34,30,39); + const __m256i ROTATE_6 = _mm256_set_epi64x(17,10,50,13); + const __m256i ROTATE_7 = _mm256_set_epi64x(43,39,29,25); + const __m256i ROTATE_8 = _mm256_set_epi64x(22,56,35, 8); + +#define THREEFISH_ROUND(X0, X1, SHL) \ + do { \ + const __m256i SHR = _mm256_sub_epi64(_mm256_set1_epi64x(64), SHL); \ + X0 = _mm256_add_epi64(X0, X1); \ + X1 = _mm256_or_si256(_mm256_sllv_epi64(X1, SHL), _mm256_srlv_epi64(X1, SHR)); \ + X1 = _mm256_xor_si256(X1, X0); \ + X0 = _mm256_permute4x64_epi64(X0, _MM_SHUFFLE(0, 3, 2, 1)); \ + X1 = _mm256_permute4x64_epi64(X1, _MM_SHUFFLE(1, 2, 3, 0)); \ + } while(0) + +#define THREEFISH_ROUND_2(X0, X1, X2, X3, SHL) \ + do { \ + const __m256i SHR = _mm256_sub_epi64(_mm256_set1_epi64x(64), SHL); \ + X0 = _mm256_add_epi64(X0, X1); \ + X2 = _mm256_add_epi64(X2, X3); \ + X1 = _mm256_or_si256(_mm256_sllv_epi64(X1, SHL), _mm256_srlv_epi64(X1, SHR)); \ + X3 = _mm256_or_si256(_mm256_sllv_epi64(X3, SHL), _mm256_srlv_epi64(X3, SHR)); \ + X1 = _mm256_xor_si256(X1, X0); \ + X3 = _mm256_xor_si256(X3, X2); \ + X0 = _mm256_permute4x64_epi64(X0, _MM_SHUFFLE(0, 3, 2, 1)); \ + X2 = _mm256_permute4x64_epi64(X2, _MM_SHUFFLE(0, 3, 2, 1)); \ + X1 = _mm256_permute4x64_epi64(X1, _MM_SHUFFLE(1, 2, 3, 0)); \ + X3 = _mm256_permute4x64_epi64(X3, _MM_SHUFFLE(1, 2, 3, 0)); \ + } while(0) + +#define THREEFISH_INJECT_KEY(X0, X1, R, K0, K1, T0I, T1I) \ + do { \ + const __m256i T0 = _mm256_permute4x64_epi64(T, _MM_SHUFFLE(T0I, 0, 0, 0)); \ + const __m256i T1 = _mm256_permute4x64_epi64(T, _MM_SHUFFLE(0, T1I, 0, 0)); \ + X0 = _mm256_add_epi64(X0, K0); \ + X1 = _mm256_add_epi64(X1, K1); \ + X1 = _mm256_add_epi64(X1, _mm256_set_epi64x(R,0,0,0)); \ + X0 = _mm256_add_epi64(X0, T0); \ + X1 = _mm256_add_epi64(X1, T1); \ + } while(0) + +#define THREEFISH_INJECT_KEY_2(X0, X1, X2, X3, R, K0, K1, T0I, T1I) \ + do { \ + const __m256i T0 = _mm256_permute4x64_epi64(T, _MM_SHUFFLE(T0I, 0, 0, 0)); \ + __m256i T1 = _mm256_permute4x64_epi64(T, _MM_SHUFFLE(0, T1I, 0, 0)); \ + X0 = _mm256_add_epi64(X0, K0); \ + X2 = _mm256_add_epi64(X2, K0); \ + X1 = _mm256_add_epi64(X1, K1); \ + X3 = _mm256_add_epi64(X3, K1); \ + T1 = _mm256_add_epi64(T1, _mm256_set_epi64x(R,0,0,0)); \ + X0 = _mm256_add_epi64(X0, T0); \ + X2 = _mm256_add_epi64(X2, T0); \ + X1 = _mm256_add_epi64(X1, T1); \ + X3 = _mm256_add_epi64(X3, T1); \ + } while(0) + +#define THREEFISH_ENC_8_ROUNDS(X0, X1, R, K0, K1, K2, T0, T1, T2) \ + do { \ + rotate_keys(K1, K2, K0); \ + THREEFISH_ROUND(X0, X1, ROTATE_1); \ + THREEFISH_ROUND(X0, X1, ROTATE_2); \ + THREEFISH_ROUND(X0, X1, ROTATE_3); \ + THREEFISH_ROUND(X0, X1, ROTATE_4); \ + THREEFISH_INJECT_KEY(X0, X1, R, K0, K1, T0, T1); \ + \ + THREEFISH_ROUND(X0, X1, ROTATE_5); \ + THREEFISH_ROUND(X0, X1, ROTATE_6); \ + THREEFISH_ROUND(X0, X1, ROTATE_7); \ + THREEFISH_ROUND(X0, X1, ROTATE_8); \ + THREEFISH_INJECT_KEY(X0, X1, R+1, K1, K2, T2, T0); \ + } while(0) + +#define THREEFISH_ENC_2_8_ROUNDS(X0, X1, X2, X3, R, K0, K1, K2, T0, T1, T2) \ + do { \ + rotate_keys(K1, K2, K0); \ + THREEFISH_ROUND_2(X0, X1, X2, X3, ROTATE_1); \ + THREEFISH_ROUND_2(X0, X1, X2, X3, ROTATE_2); \ + THREEFISH_ROUND_2(X0, X1, X2, X3, ROTATE_3); \ + THREEFISH_ROUND_2(X0, X1, X2, X3, ROTATE_4); \ + THREEFISH_INJECT_KEY_2(X0, X1, X2, X3, R, K0, K1, T0, T1); \ + \ + THREEFISH_ROUND_2(X0, X1, X2, X3, ROTATE_5); \ + THREEFISH_ROUND_2(X0, X1, X2, X3, ROTATE_6); \ + THREEFISH_ROUND_2(X0, X1, X2, X3, ROTATE_7); \ + THREEFISH_ROUND_2(X0, X1, X2, X3, ROTATE_8); \ + THREEFISH_INJECT_KEY_2(X0, X1, X2, X3, R+1, K1, K2, T2, T0); \ + } while(0) + + __m256i K0 = _mm256_set_epi64x(K[5], K[3], K[1], K[8]); + __m256i K1 = _mm256_set_epi64x(K[6], K[4], K[2], K[0]); + __m256i K2 = _mm256_set_epi64x(K[7], K[5], K[3], K[1]); + + const __m256i* in_mm = reinterpret_cast<const __m256i*>(in); + __m256i* out_mm = reinterpret_cast<__m256i*>(out); + + while(blocks >= 2) + { + __m256i X0 = _mm256_loadu_si256(in_mm++); + __m256i X1 = _mm256_loadu_si256(in_mm++); + __m256i X2 = _mm256_loadu_si256(in_mm++); + __m256i X3 = _mm256_loadu_si256(in_mm++); + + const __m256i T = _mm256_set_epi64x(T_64[0], T_64[1], T_64[2], 0); + + interleave_epi64(X0, X1); + interleave_epi64(X2, X3); + + THREEFISH_INJECT_KEY_2(X0, X1, X2, X3, 0, K1, K2, 2, 3); + + THREEFISH_ENC_2_8_ROUNDS(X0, X1, X2, X3, 1, K2,K0,K1, 1, 2, 3); + THREEFISH_ENC_2_8_ROUNDS(X0, X1, X2, X3, 3, K1,K2,K0, 2, 3, 1); + THREEFISH_ENC_2_8_ROUNDS(X0, X1, X2, X3, 5, K0,K1,K2, 3, 1, 2); + THREEFISH_ENC_2_8_ROUNDS(X0, X1, X2, X3, 7, K2,K0,K1, 1, 2, 3); + THREEFISH_ENC_2_8_ROUNDS(X0, X1, X2, X3, 9, K1,K2,K0, 2, 3, 1); + THREEFISH_ENC_2_8_ROUNDS(X0, X1, X2, X3, 11, K0,K1,K2, 3, 1, 2); + THREEFISH_ENC_2_8_ROUNDS(X0, X1, X2, X3, 13, K2,K0,K1, 1, 2, 3); + THREEFISH_ENC_2_8_ROUNDS(X0, X1, X2, X3, 15, K1,K2,K0, 2, 3, 1); + THREEFISH_ENC_2_8_ROUNDS(X0, X1, X2, X3, 17, K0,K1,K2, 3, 1, 2); + + deinterleave_epi64(X0, X1); + deinterleave_epi64(X2, X3); + + _mm256_storeu_si256(out_mm++, X0); + _mm256_storeu_si256(out_mm++, X1); + _mm256_storeu_si256(out_mm++, X2); + _mm256_storeu_si256(out_mm++, X3); + + blocks -= 2; + } + + for(size_t i = 0; i != blocks; ++i) + { + __m256i X0 = _mm256_loadu_si256(in_mm++); + __m256i X1 = _mm256_loadu_si256(in_mm++); + + const __m256i T = _mm256_set_epi64x(T_64[0], T_64[1], T_64[2], 0); + + interleave_epi64(X0, X1); + + THREEFISH_INJECT_KEY(X0, X1, 0, K1, K2, 2, 3); + + THREEFISH_ENC_8_ROUNDS(X0, X1, 1, K2,K0,K1, 1, 2, 3); + THREEFISH_ENC_8_ROUNDS(X0, X1, 3, K1,K2,K0, 2, 3, 1); + THREEFISH_ENC_8_ROUNDS(X0, X1, 5, K0,K1,K2, 3, 1, 2); + THREEFISH_ENC_8_ROUNDS(X0, X1, 7, K2,K0,K1, 1, 2, 3); + THREEFISH_ENC_8_ROUNDS(X0, X1, 9, K1,K2,K0, 2, 3, 1); + THREEFISH_ENC_8_ROUNDS(X0, X1, 11, K0,K1,K2, 3, 1, 2); + THREEFISH_ENC_8_ROUNDS(X0, X1, 13, K2,K0,K1, 1, 2, 3); + THREEFISH_ENC_8_ROUNDS(X0, X1, 15, K1,K2,K0, 2, 3, 1); + THREEFISH_ENC_8_ROUNDS(X0, X1, 17, K0,K1,K2, 3, 1, 2); + + deinterleave_epi64(X0, X1); + + _mm256_storeu_si256(out_mm++, X0); + _mm256_storeu_si256(out_mm++, X1); + } + + _mm256_zeroall(); + +#undef THREEFISH_ENC_8_ROUNDS +#undef THREEFISH_ROUND +#undef THREEFISH_INJECT_KEY +#undef THREEFISH_DEC_2_8_ROUNDS +#undef THREEFISH_ROUND_2 +#undef THREEFISH_INJECT_KEY_2 + } + +BOTAN_FUNC_ISA("avx2") +void Threefish_512::avx2_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + _mm256_zeroupper(); + + const uint64_t* K = m_K.data(); + const uint64_t* T_64 = m_T.data(); + + const __m256i ROTATE_1 = _mm256_set_epi64x(37,19,36,46); + const __m256i ROTATE_2 = _mm256_set_epi64x(42,14,27,33); + const __m256i ROTATE_3 = _mm256_set_epi64x(39,36,49,17); + const __m256i ROTATE_4 = _mm256_set_epi64x(56,54, 9,44); + const __m256i ROTATE_5 = _mm256_set_epi64x(24,34,30,39); + const __m256i ROTATE_6 = _mm256_set_epi64x(17,10,50,13); + const __m256i ROTATE_7 = _mm256_set_epi64x(43,39,29,25); + const __m256i ROTATE_8 = _mm256_set_epi64x(22,56,35, 8); + +#define THREEFISH_ROUND(X0, X1, SHR) \ + do { \ + const __m256i SHL = _mm256_sub_epi64(_mm256_set1_epi64x(64), SHR); \ + X0 = _mm256_permute4x64_epi64(X0, _MM_SHUFFLE(2, 1, 0, 3)); \ + X1 = _mm256_permute4x64_epi64(X1, _MM_SHUFFLE(1, 2, 3, 0)); \ + X1 = _mm256_xor_si256(X1, X0); \ + X1 = _mm256_or_si256(_mm256_sllv_epi64(X1, SHL), _mm256_srlv_epi64(X1, SHR)); \ + X0 = _mm256_sub_epi64(X0, X1); \ + } while(0) + +#define THREEFISH_ROUND_2(X0, X1, X2, X3, SHR) \ + do { \ + const __m256i SHL = _mm256_sub_epi64(_mm256_set1_epi64x(64), SHR); \ + X0 = _mm256_permute4x64_epi64(X0, _MM_SHUFFLE(2, 1, 0, 3)); \ + X2 = _mm256_permute4x64_epi64(X2, _MM_SHUFFLE(2, 1, 0, 3)); \ + X1 = _mm256_permute4x64_epi64(X1, _MM_SHUFFLE(1, 2, 3, 0)); \ + X3 = _mm256_permute4x64_epi64(X3, _MM_SHUFFLE(1, 2, 3, 0)); \ + X1 = _mm256_xor_si256(X1, X0); \ + X3 = _mm256_xor_si256(X3, X2); \ + X1 = _mm256_or_si256(_mm256_sllv_epi64(X1, SHL), _mm256_srlv_epi64(X1, SHR)); \ + X3 = _mm256_or_si256(_mm256_sllv_epi64(X3, SHL), _mm256_srlv_epi64(X3, SHR)); \ + X0 = _mm256_sub_epi64(X0, X1); \ + X2 = _mm256_sub_epi64(X2, X3); \ + } while(0) + +#define THREEFISH_INJECT_KEY(X0, X1, R, K0, K1, T0I, T1I) \ + do { \ + const __m256i T0 = _mm256_permute4x64_epi64(T, _MM_SHUFFLE(T0I, 0, 0, 0)); \ + const __m256i T1 = _mm256_permute4x64_epi64(T, _MM_SHUFFLE(0, T1I, 0, 0)); \ + X0 = _mm256_sub_epi64(X0, K0); \ + X1 = _mm256_sub_epi64(X1, K1); \ + X1 = _mm256_sub_epi64(X1, _mm256_set_epi64x(R, 0, 0, 0)); \ + X0 = _mm256_sub_epi64(X0, T0); \ + X1 = _mm256_sub_epi64(X1, T1); \ + } while(0) + +#define THREEFISH_DEC_8_ROUNDS(X0, X1, R, K1, K2, K3, T0, T1, T2) \ + do { \ + THREEFISH_INJECT_KEY(X0, X1, R+1, K2, K3, T2, T0); \ + THREEFISH_ROUND(X0, X1, ROTATE_8); \ + THREEFISH_ROUND(X0, X1, ROTATE_7); \ + THREEFISH_ROUND(X0, X1, ROTATE_6); \ + THREEFISH_ROUND(X0, X1, ROTATE_5); \ + \ + THREEFISH_INJECT_KEY(X0, X1, R, K1, K2, T0, T1); \ + THREEFISH_ROUND(X0, X1, ROTATE_4); \ + THREEFISH_ROUND(X0, X1, ROTATE_3); \ + THREEFISH_ROUND(X0, X1, ROTATE_2); \ + THREEFISH_ROUND(X0, X1, ROTATE_1); \ + } while(0) + +#define THREEFISH_INJECT_KEY_2(X0, X1, X2, X3, R, K0, K1, T0I, T1I) \ + do { \ + const __m256i T0 = _mm256_permute4x64_epi64(T, _MM_SHUFFLE(T0I, 0, 0, 0)); \ + __m256i T1 = _mm256_permute4x64_epi64(T, _MM_SHUFFLE(0, T1I, 0, 0)); \ + X0 = _mm256_sub_epi64(X0, K0); \ + X2 = _mm256_sub_epi64(X2, K0); \ + X1 = _mm256_sub_epi64(X1, K1); \ + X3 = _mm256_sub_epi64(X3, K1); \ + T1 = _mm256_add_epi64(T1, _mm256_set_epi64x(R,0,0,0)); \ + X0 = _mm256_sub_epi64(X0, T0); \ + X2 = _mm256_sub_epi64(X2, T0); \ + X1 = _mm256_sub_epi64(X1, T1); \ + X3 = _mm256_sub_epi64(X3, T1); \ + } while(0) + +#define THREEFISH_DEC_2_8_ROUNDS(X0, X1, X2, X3, R, K1, K2, K3, T0, T1, T2) \ + do { \ + THREEFISH_INJECT_KEY_2(X0, X1, X2, X3, R+1, K2, K3, T2, T0); \ + THREEFISH_ROUND_2(X0, X1, X2, X3, ROTATE_8); \ + THREEFISH_ROUND_2(X0, X1, X2, X3, ROTATE_7); \ + THREEFISH_ROUND_2(X0, X1, X2, X3, ROTATE_6); \ + THREEFISH_ROUND_2(X0, X1, X2, X3, ROTATE_5); \ + \ + THREEFISH_INJECT_KEY_2(X0, X1, X2, X3, R, K1, K2, T0, T1); \ + THREEFISH_ROUND_2(X0, X1, X2, X3, ROTATE_4); \ + THREEFISH_ROUND_2(X0, X1, X2, X3, ROTATE_3); \ + THREEFISH_ROUND_2(X0, X1, X2, X3, ROTATE_2); \ + THREEFISH_ROUND_2(X0, X1, X2, X3, ROTATE_1); \ + } while(0) + + /* + v1.0 key schedule: 9 ymm registers (only need 2 or 3) + (0,1,2,3),(4,5,6,7) [8] + then mutating with vpermq + */ + const __m256i K0 = _mm256_set_epi64x(K[6], K[4], K[2], K[0]); + const __m256i K1 = _mm256_set_epi64x(K[7], K[5], K[3], K[1]); + const __m256i K2 = _mm256_set_epi64x(K[8], K[6], K[4], K[2]); + const __m256i K3 = _mm256_set_epi64x(K[0], K[7], K[5], K[3]); + const __m256i K4 = _mm256_set_epi64x(K[1], K[8], K[6], K[4]); + const __m256i K5 = _mm256_set_epi64x(K[2], K[0], K[7], K[5]); + const __m256i K6 = _mm256_set_epi64x(K[3], K[1], K[8], K[6]); + const __m256i K7 = _mm256_set_epi64x(K[4], K[2], K[0], K[7]); + const __m256i K8 = _mm256_set_epi64x(K[5], K[3], K[1], K[8]); + + const __m256i* in_mm = reinterpret_cast<const __m256i*>(in); + __m256i* out_mm = reinterpret_cast<__m256i*>(out); + + while(blocks >= 2) + { + __m256i X0 = _mm256_loadu_si256(in_mm++); + __m256i X1 = _mm256_loadu_si256(in_mm++); + __m256i X2 = _mm256_loadu_si256(in_mm++); + __m256i X3 = _mm256_loadu_si256(in_mm++); + + const __m256i T = _mm256_set_epi64x(T_64[0], T_64[1], T_64[2], 0); + + interleave_epi64(X0, X1); + interleave_epi64(X2, X3); + + THREEFISH_DEC_2_8_ROUNDS(X0, X1, X2, X3, 17, K8,K0,K1, 3, 1, 2); + THREEFISH_DEC_2_8_ROUNDS(X0, X1, X2, X3, 15, K6,K7,K8, 2, 3, 1); + THREEFISH_DEC_2_8_ROUNDS(X0, X1, X2, X3, 13, K4,K5,K6, 1, 2, 3); + THREEFISH_DEC_2_8_ROUNDS(X0, X1, X2, X3, 11, K2,K3,K4, 3, 1, 2); + THREEFISH_DEC_2_8_ROUNDS(X0, X1, X2, X3, 9, K0,K1,K2, 2, 3, 1); + THREEFISH_DEC_2_8_ROUNDS(X0, X1, X2, X3, 7, K7,K8,K0, 1, 2, 3); + THREEFISH_DEC_2_8_ROUNDS(X0, X1, X2, X3, 5, K5,K6,K7, 3, 1, 2); + THREEFISH_DEC_2_8_ROUNDS(X0, X1, X2, X3, 3, K3,K4,K5, 2, 3, 1); + THREEFISH_DEC_2_8_ROUNDS(X0, X1, X2, X3, 1, K1,K2,K3, 1, 2, 3); + + THREEFISH_INJECT_KEY_2(X0, X1, X2, X3, 0, K0, K1, 2, 3); + + deinterleave_epi64(X0, X1); + deinterleave_epi64(X2, X3); + + _mm256_storeu_si256(out_mm++, X0); + _mm256_storeu_si256(out_mm++, X1); + _mm256_storeu_si256(out_mm++, X2); + _mm256_storeu_si256(out_mm++, X3); + + blocks -= 2; + } + + for(size_t i = 0; i != blocks; ++i) + { + __m256i X0 = _mm256_loadu_si256(in_mm++); + __m256i X1 = _mm256_loadu_si256(in_mm++); + + const __m256i T = _mm256_set_epi64x(T_64[0], T_64[1], T_64[2], 0); + + interleave_epi64(X0, X1); + + THREEFISH_DEC_8_ROUNDS(X0, X1, 17, K8,K0,K1, 3, 1, 2); + THREEFISH_DEC_8_ROUNDS(X0, X1, 15, K6,K7,K8, 2, 3, 1); + THREEFISH_DEC_8_ROUNDS(X0, X1, 13, K4,K5,K6, 1, 2, 3); + THREEFISH_DEC_8_ROUNDS(X0, X1, 11, K2,K3,K4, 3, 1, 2); + THREEFISH_DEC_8_ROUNDS(X0, X1, 9, K0,K1,K2, 2, 3, 1); + THREEFISH_DEC_8_ROUNDS(X0, X1, 7, K7,K8,K0, 1, 2, 3); + THREEFISH_DEC_8_ROUNDS(X0, X1, 5, K5,K6,K7, 3, 1, 2); + THREEFISH_DEC_8_ROUNDS(X0, X1, 3, K3,K4,K5, 2, 3, 1); + THREEFISH_DEC_8_ROUNDS(X0, X1, 1, K1,K2,K3, 1, 2, 3); + + THREEFISH_INJECT_KEY(X0, X1, 0, K0, K1, 2, 3); + + deinterleave_epi64(X0, X1); + + _mm256_storeu_si256(out_mm++, X0); + _mm256_storeu_si256(out_mm++, X1); + } + +#undef THREEFISH_DEC_8_ROUNDS +#undef THREEFISH_ROUND +#undef THREEFISH_INJECT_KEY +#undef THREEFISH_DEC_2_8_ROUNDS +#undef THREEFISH_ROUND_2 +#undef THREEFISH_INJECT_KEY_2 + + _mm256_zeroall(); + } + +} diff --git a/comm/third_party/botan/src/lib/block/twofish/info.txt b/comm/third_party/botan/src/lib/block/twofish/info.txt new file mode 100644 index 0000000000..9febbc8dd2 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/twofish/info.txt @@ -0,0 +1,3 @@ +<defines> +TWOFISH -> 20131128 +</defines> diff --git a/comm/third_party/botan/src/lib/block/twofish/twofish.cpp b/comm/third_party/botan/src/lib/block/twofish/twofish.cpp new file mode 100644 index 0000000000..3a508dc9d5 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/twofish/twofish.cpp @@ -0,0 +1,326 @@ +/* +* Twofish +* (C) 1999-2007,2017 Jack Lloyd +* +* The key schedule implemenation is based on a public domain +* implementation by Matthew Skala +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/twofish.h> +#include <botan/loadstor.h> +#include <botan/rotate.h> + +namespace Botan { + +namespace { + +inline void TF_E(uint32_t A, uint32_t B, uint32_t& C, uint32_t& D, + uint32_t RK1, uint32_t RK2, + const secure_vector<uint32_t>& SB) + { + uint32_t X = SB[ get_byte(3, A)] ^ SB[256+get_byte(2, A)] ^ + SB[512+get_byte(1, A)] ^ SB[768+get_byte(0, A)]; + uint32_t Y = SB[ get_byte(0, B)] ^ SB[256+get_byte(3, B)] ^ + SB[512+get_byte(2, B)] ^ SB[768+get_byte(1, B)]; + + X += Y; + Y += X; + + X += RK1; + Y += RK2; + + C = rotr<1>(C ^ X); + D = rotl<1>(D) ^ Y; + } + +inline void TF_D(uint32_t A, uint32_t B, uint32_t& C, uint32_t& D, + uint32_t RK1, uint32_t RK2, + const secure_vector<uint32_t>& SB) + { + uint32_t X = SB[ get_byte(3, A)] ^ SB[256+get_byte(2, A)] ^ + SB[512+get_byte(1, A)] ^ SB[768+get_byte(0, A)]; + uint32_t Y = SB[ get_byte(0, B)] ^ SB[256+get_byte(3, B)] ^ + SB[512+get_byte(2, B)] ^ SB[768+get_byte(1, B)]; + + X += Y; + Y += X; + + X += RK1; + Y += RK2; + + C = rotl<1>(C) ^ X; + D = rotr<1>(D ^ Y); + } + +} + +/* +* Twofish Encryption +*/ +void Twofish::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_SB.empty() == false); + + while(blocks >= 2) + { + uint32_t A0, B0, C0, D0; + uint32_t A1, B1, C1, D1; + load_le(in, A0, B0, C0, D0, A1, B1, C1, D1); + + A0 ^= m_RK[0]; + A1 ^= m_RK[0]; + B0 ^= m_RK[1]; + B1 ^= m_RK[1]; + C0 ^= m_RK[2]; + C1 ^= m_RK[2]; + D0 ^= m_RK[3]; + D1 ^= m_RK[3]; + + for(size_t k = 8; k != 40; k += 4) + { + TF_E(A0, B0, C0, D0, m_RK[k+0], m_RK[k+1], m_SB); + TF_E(A1, B1, C1, D1, m_RK[k+0], m_RK[k+1], m_SB); + + TF_E(C0, D0, A0, B0, m_RK[k+2], m_RK[k+3], m_SB); + TF_E(C1, D1, A1, B1, m_RK[k+2], m_RK[k+3], m_SB); + } + + C0 ^= m_RK[4]; + C1 ^= m_RK[4]; + D0 ^= m_RK[5]; + D1 ^= m_RK[5]; + A0 ^= m_RK[6]; + A1 ^= m_RK[6]; + B0 ^= m_RK[7]; + B1 ^= m_RK[7]; + + store_le(out, C0, D0, A0, B0, C1, D1, A1, B1); + + blocks -= 2; + out += 2*BLOCK_SIZE; + in += 2*BLOCK_SIZE; + } + + if(blocks) + { + uint32_t A, B, C, D; + load_le(in, A, B, C, D); + + A ^= m_RK[0]; + B ^= m_RK[1]; + C ^= m_RK[2]; + D ^= m_RK[3]; + + for(size_t k = 8; k != 40; k += 4) + { + TF_E(A, B, C, D, m_RK[k ], m_RK[k+1], m_SB); + TF_E(C, D, A, B, m_RK[k+2], m_RK[k+3], m_SB); + } + + C ^= m_RK[4]; + D ^= m_RK[5]; + A ^= m_RK[6]; + B ^= m_RK[7]; + + store_le(out, C, D, A, B); + } + } + +/* +* Twofish Decryption +*/ +void Twofish::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_SB.empty() == false); + + while(blocks >= 2) + { + uint32_t A0, B0, C0, D0; + uint32_t A1, B1, C1, D1; + load_le(in, A0, B0, C0, D0, A1, B1, C1, D1); + + A0 ^= m_RK[4]; + A1 ^= m_RK[4]; + B0 ^= m_RK[5]; + B1 ^= m_RK[5]; + C0 ^= m_RK[6]; + C1 ^= m_RK[6]; + D0 ^= m_RK[7]; + D1 ^= m_RK[7]; + + for(size_t k = 40; k != 8; k -= 4) + { + TF_D(A0, B0, C0, D0, m_RK[k-2], m_RK[k-1], m_SB); + TF_D(A1, B1, C1, D1, m_RK[k-2], m_RK[k-1], m_SB); + + TF_D(C0, D0, A0, B0, m_RK[k-4], m_RK[k-3], m_SB); + TF_D(C1, D1, A1, B1, m_RK[k-4], m_RK[k-3], m_SB); + } + + C0 ^= m_RK[0]; + C1 ^= m_RK[0]; + D0 ^= m_RK[1]; + D1 ^= m_RK[1]; + A0 ^= m_RK[2]; + A1 ^= m_RK[2]; + B0 ^= m_RK[3]; + B1 ^= m_RK[3]; + + store_le(out, C0, D0, A0, B0, C1, D1, A1, B1); + + blocks -= 2; + out += 2*BLOCK_SIZE; + in += 2*BLOCK_SIZE; + } + + if(blocks) + { + uint32_t A, B, C, D; + load_le(in, A, B, C, D); + + A ^= m_RK[4]; + B ^= m_RK[5]; + C ^= m_RK[6]; + D ^= m_RK[7]; + + for(size_t k = 40; k != 8; k -= 4) + { + TF_D(A, B, C, D, m_RK[k-2], m_RK[k-1], m_SB); + TF_D(C, D, A, B, m_RK[k-4], m_RK[k-3], m_SB); + } + + C ^= m_RK[0]; + D ^= m_RK[1]; + A ^= m_RK[2]; + B ^= m_RK[3]; + + store_le(out, C, D, A, B); + } + } + +/* +* Twofish Key Schedule +*/ +void Twofish::key_schedule(const uint8_t key[], size_t length) + { + m_SB.resize(1024); + m_RK.resize(40); + + secure_vector<uint8_t> S(16); + + for(size_t i = 0; i != length; ++i) + { + /* + * Do one column of the RS matrix multiplcation + */ + if(key[i]) + { + uint8_t X = POLY_TO_EXP[key[i] - 1]; + + uint8_t RS1 = RS[(4*i ) % 32]; + uint8_t RS2 = RS[(4*i+1) % 32]; + uint8_t RS3 = RS[(4*i+2) % 32]; + uint8_t RS4 = RS[(4*i+3) % 32]; + + S[4*(i/8) ] ^= EXP_TO_POLY[(X + POLY_TO_EXP[RS1 - 1]) % 255]; + S[4*(i/8)+1] ^= EXP_TO_POLY[(X + POLY_TO_EXP[RS2 - 1]) % 255]; + S[4*(i/8)+2] ^= EXP_TO_POLY[(X + POLY_TO_EXP[RS3 - 1]) % 255]; + S[4*(i/8)+3] ^= EXP_TO_POLY[(X + POLY_TO_EXP[RS4 - 1]) % 255]; + } + } + + if(length == 16) + { + for(size_t i = 0; i != 256; ++i) + { + m_SB[ i] = MDS0[Q0[Q0[i]^S[ 0]]^S[ 4]]; + m_SB[256+i] = MDS1[Q0[Q1[i]^S[ 1]]^S[ 5]]; + m_SB[512+i] = MDS2[Q1[Q0[i]^S[ 2]]^S[ 6]]; + m_SB[768+i] = MDS3[Q1[Q1[i]^S[ 3]]^S[ 7]]; + } + + for(size_t i = 0; i < 40; i += 2) + { + uint32_t X = MDS0[Q0[Q0[i ]^key[ 8]]^key[ 0]] ^ + MDS1[Q0[Q1[i ]^key[ 9]]^key[ 1]] ^ + MDS2[Q1[Q0[i ]^key[10]]^key[ 2]] ^ + MDS3[Q1[Q1[i ]^key[11]]^key[ 3]]; + uint32_t Y = MDS0[Q0[Q0[i+1]^key[12]]^key[ 4]] ^ + MDS1[Q0[Q1[i+1]^key[13]]^key[ 5]] ^ + MDS2[Q1[Q0[i+1]^key[14]]^key[ 6]] ^ + MDS3[Q1[Q1[i+1]^key[15]]^key[ 7]]; + Y = rotl<8>(Y); + X += Y; Y += X; + + m_RK[i] = X; + m_RK[i+1] = rotl<9>(Y); + } + } + else if(length == 24) + { + for(size_t i = 0; i != 256; ++i) + { + m_SB[ i] = MDS0[Q0[Q0[Q1[i]^S[ 0]]^S[ 4]]^S[ 8]]; + m_SB[256+i] = MDS1[Q0[Q1[Q1[i]^S[ 1]]^S[ 5]]^S[ 9]]; + m_SB[512+i] = MDS2[Q1[Q0[Q0[i]^S[ 2]]^S[ 6]]^S[10]]; + m_SB[768+i] = MDS3[Q1[Q1[Q0[i]^S[ 3]]^S[ 7]]^S[11]]; + } + + for(size_t i = 0; i < 40; i += 2) + { + uint32_t X = MDS0[Q0[Q0[Q1[i ]^key[16]]^key[ 8]]^key[ 0]] ^ + MDS1[Q0[Q1[Q1[i ]^key[17]]^key[ 9]]^key[ 1]] ^ + MDS2[Q1[Q0[Q0[i ]^key[18]]^key[10]]^key[ 2]] ^ + MDS3[Q1[Q1[Q0[i ]^key[19]]^key[11]]^key[ 3]]; + uint32_t Y = MDS0[Q0[Q0[Q1[i+1]^key[20]]^key[12]]^key[ 4]] ^ + MDS1[Q0[Q1[Q1[i+1]^key[21]]^key[13]]^key[ 5]] ^ + MDS2[Q1[Q0[Q0[i+1]^key[22]]^key[14]]^key[ 6]] ^ + MDS3[Q1[Q1[Q0[i+1]^key[23]]^key[15]]^key[ 7]]; + Y = rotl<8>(Y); + X += Y; Y += X; + + m_RK[i] = X; + m_RK[i+1] = rotl<9>(Y); + } + } + else if(length == 32) + { + for(size_t i = 0; i != 256; ++i) + { + m_SB[ i] = MDS0[Q0[Q0[Q1[Q1[i]^S[ 0]]^S[ 4]]^S[ 8]]^S[12]]; + m_SB[256+i] = MDS1[Q0[Q1[Q1[Q0[i]^S[ 1]]^S[ 5]]^S[ 9]]^S[13]]; + m_SB[512+i] = MDS2[Q1[Q0[Q0[Q0[i]^S[ 2]]^S[ 6]]^S[10]]^S[14]]; + m_SB[768+i] = MDS3[Q1[Q1[Q0[Q1[i]^S[ 3]]^S[ 7]]^S[11]]^S[15]]; + } + + for(size_t i = 0; i < 40; i += 2) + { + uint32_t X = MDS0[Q0[Q0[Q1[Q1[i ]^key[24]]^key[16]]^key[ 8]]^key[ 0]] ^ + MDS1[Q0[Q1[Q1[Q0[i ]^key[25]]^key[17]]^key[ 9]]^key[ 1]] ^ + MDS2[Q1[Q0[Q0[Q0[i ]^key[26]]^key[18]]^key[10]]^key[ 2]] ^ + MDS3[Q1[Q1[Q0[Q1[i ]^key[27]]^key[19]]^key[11]]^key[ 3]]; + uint32_t Y = MDS0[Q0[Q0[Q1[Q1[i+1]^key[28]]^key[20]]^key[12]]^key[ 4]] ^ + MDS1[Q0[Q1[Q1[Q0[i+1]^key[29]]^key[21]]^key[13]]^key[ 5]] ^ + MDS2[Q1[Q0[Q0[Q0[i+1]^key[30]]^key[22]]^key[14]]^key[ 6]] ^ + MDS3[Q1[Q1[Q0[Q1[i+1]^key[31]]^key[23]]^key[15]]^key[ 7]]; + Y = rotl<8>(Y); + X += Y; Y += X; + + m_RK[i] = X; + m_RK[i+1] = rotl<9>(Y); + } + } + } + +/* +* Clear memory of sensitive data +*/ +void Twofish::clear() + { + zap(m_SB); + zap(m_RK); + } + +} diff --git a/comm/third_party/botan/src/lib/block/twofish/twofish.h b/comm/third_party/botan/src/lib/block/twofish/twofish.h new file mode 100644 index 0000000000..027e2c7011 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/twofish/twofish.h @@ -0,0 +1,47 @@ +/* +* Twofish +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_TWOFISH_H_ +#define BOTAN_TWOFISH_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(twofish.h) + +namespace Botan { + +/** +* Twofish, an AES finalist +*/ +class BOTAN_PUBLIC_API(2,0) Twofish final : public Block_Cipher_Fixed_Params<16, 16, 32, 8> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string name() const override { return "Twofish"; } + BlockCipher* clone() const override { return new Twofish; } + private: + void key_schedule(const uint8_t[], size_t) override; + + static const uint32_t MDS0[256]; + static const uint32_t MDS1[256]; + static const uint32_t MDS2[256]; + static const uint32_t MDS3[256]; + static const uint8_t Q0[256]; + static const uint8_t Q1[256]; + static const uint8_t RS[32]; + static const uint8_t EXP_TO_POLY[255]; + static const uint8_t POLY_TO_EXP[255]; + + secure_vector<uint32_t> m_SB, m_RK; + }; + +} + +#endif diff --git a/comm/third_party/botan/src/lib/block/twofish/twofish_tab.cpp b/comm/third_party/botan/src/lib/block/twofish/twofish_tab.cpp new file mode 100644 index 0000000000..acdb35560a --- /dev/null +++ b/comm/third_party/botan/src/lib/block/twofish/twofish_tab.cpp @@ -0,0 +1,293 @@ +/* +* S-Box and MDS Tables for Twofish +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/twofish.h> + +namespace Botan { + +alignas(64) const uint8_t Twofish::Q0[256] = { + 0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78, + 0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C, + 0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30, + 0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82, + 0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE, + 0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B, + 0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45, + 0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7, + 0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF, + 0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8, + 0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED, + 0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90, + 0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B, + 0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B, + 0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F, + 0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A, + 0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17, + 0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72, + 0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68, + 0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4, + 0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42, + 0x4A, 0x5E, 0xC1, 0xE0 }; + +alignas(64) const uint8_t Twofish::Q1[256] = { + 0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B, + 0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1, + 0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B, + 0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5, + 0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54, + 0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96, + 0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7, + 0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8, + 0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF, + 0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9, + 0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D, + 0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E, + 0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21, + 0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01, + 0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E, + 0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64, + 0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44, + 0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E, + 0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B, + 0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9, + 0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56, + 0x55, 0x09, 0xBE, 0x91 }; + +alignas(64) const uint8_t Twofish::RS[32] = { + 0x01, 0xA4, 0x02, 0xA4, 0xA4, 0x56, 0xA1, 0x55, 0x55, 0x82, 0xFC, 0x87, + 0x87, 0xF3, 0xC1, 0x5A, 0x5A, 0x1E, 0x47, 0x58, 0x58, 0xC6, 0xAE, 0xDB, + 0xDB, 0x68, 0x3D, 0x9E, 0x9E, 0xE5, 0x19, 0x03 }; + +alignas(64) const uint8_t Twofish::EXP_TO_POLY[255] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2, + 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03, + 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6, + 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A, + 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63, + 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C, + 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07, + 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88, + 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12, + 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7, + 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C, + 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8, + 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25, + 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A, + 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE, + 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC, + 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E, + 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92, + 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89, + 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, 0xDB, 0xFB, 0xBB, + 0x3B, 0x76, 0xEC, 0x95, 0x67, 0xCE, 0xD1, 0xEF, 0x93, 0x6B, 0xD6, 0xE1, + 0x8F, 0x53, 0xA6 }; + +alignas(64) const uint8_t Twofish::POLY_TO_EXP[255] = { + 0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19, + 0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A, + 0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C, + 0x9B, 0xB7, 0xC1, 0x31, 0x2B, 0xA7, 0xA3, 0x95, 0x98, 0x4C, 0xCA, 0x1B, + 0xE6, 0x8D, 0x73, 0x36, 0xCD, 0x82, 0x12, 0x56, 0x62, 0xAB, 0xF0, 0x47, + 0x4F, 0x0E, 0xBD, 0x06, 0xD4, 0x25, 0xD2, 0x5E, 0x27, 0x88, 0x66, 0x6D, + 0xD6, 0x9C, 0x79, 0xB8, 0x08, 0xC2, 0xDF, 0x32, 0x68, 0x2C, 0xFD, 0xA8, + 0x8A, 0xA4, 0x5A, 0x96, 0x29, 0x99, 0x22, 0x4D, 0x60, 0xCB, 0xE4, 0x1C, + 0x7B, 0xE7, 0x3B, 0x8E, 0x9E, 0x74, 0xF4, 0x37, 0xD8, 0xCE, 0xF9, 0x83, + 0x6F, 0x13, 0xB2, 0x57, 0xE1, 0x63, 0xDC, 0xAC, 0xC4, 0xF1, 0xAF, 0x48, + 0x0A, 0x50, 0x42, 0x0F, 0xBA, 0xBE, 0xC7, 0x07, 0xDE, 0xD5, 0x78, 0x26, + 0x65, 0xD3, 0xD1, 0x5F, 0xE3, 0x28, 0x21, 0x89, 0x59, 0x67, 0xFC, 0x6E, + 0xB1, 0xD7, 0xF8, 0x9D, 0xF3, 0x7A, 0x3A, 0xB9, 0xC6, 0x09, 0x41, 0xC3, + 0xAE, 0xE0, 0xDB, 0x33, 0x44, 0x69, 0x92, 0x2D, 0x52, 0xFE, 0x16, 0xA9, + 0x0C, 0x8B, 0x80, 0xA5, 0x4A, 0x5B, 0xB5, 0x97, 0xC9, 0x2A, 0xA2, 0x9A, + 0xC0, 0x23, 0x86, 0x4E, 0xBC, 0x61, 0xEF, 0xCC, 0x11, 0xE5, 0x72, 0x1D, + 0x3D, 0x7C, 0xEB, 0xE8, 0xE9, 0x3C, 0xEA, 0x8F, 0x7D, 0x9F, 0xEC, 0x75, + 0x1E, 0xF5, 0x3E, 0x38, 0xF6, 0xD9, 0x3F, 0xCF, 0x76, 0xFA, 0x1F, 0x84, + 0xA0, 0x70, 0xED, 0x14, 0x90, 0xB3, 0x7E, 0x58, 0xFB, 0xE2, 0x20, 0x64, + 0xD0, 0xDD, 0x77, 0xAD, 0xDA, 0xC5, 0x40, 0xF2, 0x39, 0xB0, 0xF7, 0x49, + 0xB4, 0x0B, 0x7F, 0x51, 0x15, 0x43, 0x91, 0x10, 0x71, 0xBB, 0xEE, 0xBF, + 0x85, 0xC8, 0xA1 }; + +alignas(64) const uint32_t Twofish::MDS0[256] = { + 0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B, + 0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B, + 0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32, + 0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1, + 0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA, + 0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B, + 0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1, + 0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5, + 0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490, + 0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154, + 0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0, + 0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796, + 0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228, + 0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7, + 0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3, + 0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8, + 0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477, + 0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF, + 0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C, + 0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9, + 0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA, + 0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D, + 0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72, + 0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E, + 0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76, + 0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321, + 0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39, + 0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01, + 0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D, + 0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E, + 0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5, + 0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64, + 0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7, + 0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544, + 0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E, + 0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E, + 0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A, + 0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B, + 0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2, + 0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9, + 0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504, + 0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756, + 0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91 }; + +alignas(64) const uint32_t Twofish::MDS1[256] = { + 0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252, + 0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A, + 0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020, + 0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141, + 0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444, + 0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424, + 0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A, + 0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757, + 0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383, + 0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A, + 0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9, + 0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656, + 0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1, + 0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898, + 0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414, + 0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3, + 0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1, + 0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989, + 0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5, + 0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282, + 0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E, + 0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E, + 0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202, + 0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC, + 0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565, + 0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A, + 0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808, + 0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272, + 0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A, + 0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969, + 0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505, + 0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5, + 0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D, + 0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343, + 0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF, + 0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3, + 0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F, + 0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646, + 0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6, + 0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF, + 0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A, + 0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7, + 0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8 }; + +alignas(64) const uint32_t Twofish::MDS2[256] = { + 0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B, + 0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F, + 0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A, + 0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783, + 0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70, + 0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3, + 0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB, + 0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA, + 0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4, + 0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41, + 0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C, + 0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07, + 0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622, + 0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18, + 0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035, + 0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96, + 0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84, + 0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E, + 0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F, + 0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD, + 0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558, + 0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40, + 0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA, + 0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85, + 0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF, + 0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773, + 0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D, + 0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B, + 0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C, + 0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19, + 0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086, + 0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D, + 0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74, + 0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755, + 0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691, + 0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D, + 0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4, + 0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53, + 0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E, + 0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9, + 0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705, + 0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7, + 0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF }; + +alignas(64) const uint32_t Twofish::MDS3[256] = { + 0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98, + 0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866, + 0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643, + 0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77, + 0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9, + 0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C, + 0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3, + 0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216, + 0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F, + 0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25, + 0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF, + 0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7, + 0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4, + 0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E, + 0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA, + 0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C, + 0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12, + 0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A, + 0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D, + 0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE, + 0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A, + 0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C, + 0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B, + 0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4, + 0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B, + 0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3, + 0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE, + 0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB, + 0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85, + 0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA, + 0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E, + 0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8, + 0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33, + 0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC, + 0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718, + 0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA, + 0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8, + 0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872, + 0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882, + 0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D, + 0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10, + 0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6, + 0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8 }; + +} diff --git a/comm/third_party/botan/src/lib/block/xtea/info.txt b/comm/third_party/botan/src/lib/block/xtea/info.txt new file mode 100644 index 0000000000..b9b9ad3652 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/xtea/info.txt @@ -0,0 +1,3 @@ +<defines> +XTEA -> 20131128 +</defines> diff --git a/comm/third_party/botan/src/lib/block/xtea/xtea.cpp b/comm/third_party/botan/src/lib/block/xtea/xtea.cpp new file mode 100644 index 0000000000..7d815529ff --- /dev/null +++ b/comm/third_party/botan/src/lib/block/xtea/xtea.cpp @@ -0,0 +1,134 @@ +/* +* XTEA +* (C) 1999-2009,2016 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#include <botan/xtea.h> +#include <botan/loadstor.h> + +namespace Botan { + +/* +* XTEA Encryption +*/ +void XTEA::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_EK.empty() == false); + + const uint32_t* EK = &m_EK[0]; + + const size_t blocks4 = blocks / 4; + const size_t blocks_left = blocks % 4; + + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks4; i++) + { + uint32_t L0, R0, L1, R1, L2, R2, L3, R3; + load_be(in + 4*BLOCK_SIZE*i, L0, R0, L1, R1, L2, R2, L3, R3); + + for(size_t r = 0; r != 32; ++r) + { + L0 += (((R0 << 4) ^ (R0 >> 5)) + R0) ^ EK[2*r]; + L1 += (((R1 << 4) ^ (R1 >> 5)) + R1) ^ EK[2*r]; + L2 += (((R2 << 4) ^ (R2 >> 5)) + R2) ^ EK[2*r]; + L3 += (((R3 << 4) ^ (R3 >> 5)) + R3) ^ EK[2*r]; + + R0 += (((L0 << 4) ^ (L0 >> 5)) + L0) ^ EK[2*r+1]; + R1 += (((L1 << 4) ^ (L1 >> 5)) + L1) ^ EK[2*r+1]; + R2 += (((L2 << 4) ^ (L2 >> 5)) + L2) ^ EK[2*r+1]; + R3 += (((L3 << 4) ^ (L3 >> 5)) + L3) ^ EK[2*r+1]; + } + + store_be(out + 4*BLOCK_SIZE*i, L0, R0, L1, R1, L2, R2, L3, R3); + } + + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks_left; ++i) + { + uint32_t L, R; + load_be(in + BLOCK_SIZE*(4*blocks4+i), L, R); + + for(size_t r = 0; r != 32; ++r) + { + L += (((R << 4) ^ (R >> 5)) + R) ^ EK[2*r]; + R += (((L << 4) ^ (L >> 5)) + L) ^ EK[2*r+1]; + } + + store_be(out + BLOCK_SIZE*(4*blocks4+i), L, R); + } + } + +/* +* XTEA Decryption +*/ +void XTEA::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const + { + verify_key_set(m_EK.empty() == false); + + const uint32_t* EK = &m_EK[0]; + + const size_t blocks4 = blocks / 4; + const size_t blocks_left = blocks % 4; + + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks4; i++) + { + uint32_t L0, R0, L1, R1, L2, R2, L3, R3; + load_be(in + 4*BLOCK_SIZE*i, L0, R0, L1, R1, L2, R2, L3, R3); + + for(size_t r = 0; r != 32; ++r) + { + R0 -= (((L0 << 4) ^ (L0 >> 5)) + L0) ^ EK[63 - 2*r]; + R1 -= (((L1 << 4) ^ (L1 >> 5)) + L1) ^ EK[63 - 2*r]; + R2 -= (((L2 << 4) ^ (L2 >> 5)) + L2) ^ EK[63 - 2*r]; + R3 -= (((L3 << 4) ^ (L3 >> 5)) + L3) ^ EK[63 - 2*r]; + + L0 -= (((R0 << 4) ^ (R0 >> 5)) + R0) ^ EK[62 - 2*r]; + L1 -= (((R1 << 4) ^ (R1 >> 5)) + R1) ^ EK[62 - 2*r]; + L2 -= (((R2 << 4) ^ (R2 >> 5)) + R2) ^ EK[62 - 2*r]; + L3 -= (((R3 << 4) ^ (R3 >> 5)) + R3) ^ EK[62 - 2*r]; + } + + store_be(out + 4*BLOCK_SIZE*i, L0, R0, L1, R1, L2, R2, L3, R3); + } + + BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks_left; ++i) + { + uint32_t L, R; + load_be(in + BLOCK_SIZE*(4*blocks4+i), L, R); + + for(size_t r = 0; r != 32; ++r) + { + R -= (((L << 4) ^ (L >> 5)) + L) ^ m_EK[63 - 2*r]; + L -= (((R << 4) ^ (R >> 5)) + R) ^ m_EK[62 - 2*r]; + } + + store_be(out + BLOCK_SIZE*(4*blocks4+i), L, R); + } + } + +/* +* XTEA Key Schedule +*/ +void XTEA::key_schedule(const uint8_t key[], size_t) + { + m_EK.resize(64); + + secure_vector<uint32_t> UK(4); + for(size_t i = 0; i != 4; ++i) + UK[i] = load_be<uint32_t>(key, i); + + uint32_t D = 0; + for(size_t i = 0; i != 64; i += 2) + { + m_EK[i ] = D + UK[D % 4]; + D += 0x9E3779B9; + m_EK[i+1] = D + UK[(D >> 11) % 4]; + } + } + +void XTEA::clear() + { + zap(m_EK); + } + +} diff --git a/comm/third_party/botan/src/lib/block/xtea/xtea.h b/comm/third_party/botan/src/lib/block/xtea/xtea.h new file mode 100644 index 0000000000..bae0bc7e63 --- /dev/null +++ b/comm/third_party/botan/src/lib/block/xtea/xtea.h @@ -0,0 +1,37 @@ +/* +* XTEA +* (C) 1999-2007 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +#ifndef BOTAN_XTEA_H_ +#define BOTAN_XTEA_H_ + +#include <botan/block_cipher.h> + +BOTAN_FUTURE_INTERNAL_HEADER(xtea.h) + +namespace Botan { + +/** +* XTEA +*/ +class BOTAN_PUBLIC_API(2,0) XTEA final : public Block_Cipher_Fixed_Params<8, 16> + { + public: + void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override; + + void clear() override; + std::string name() const override { return "XTEA"; } + BlockCipher* clone() const override { return new XTEA; } + + private: + void key_schedule(const uint8_t[], size_t) override; + secure_vector<uint32_t> m_EK; + }; + +} + +#endif |