summaryrefslogtreecommitdiffstats
path: root/src/libcryptobox
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
commit133a45c109da5310add55824db21af5239951f93 (patch)
treeba6ac4c0a950a0dda56451944315d66409923918 /src/libcryptobox
parentInitial commit. (diff)
downloadrspamd-133a45c109da5310add55824db21af5239951f93.tar.xz
rspamd-133a45c109da5310add55824db21af5239951f93.zip
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/libcryptobox/AsmOpt.cmake0
-rw-r--r--src/libcryptobox/CMakeLists.txt41
-rw-r--r--src/libcryptobox/base64/avx2.c287
-rw-r--r--src/libcryptobox/base64/base64.c445
-rw-r--r--src/libcryptobox/base64/base64.h31
-rw-r--r--src/libcryptobox/base64/ref.c241
-rw-r--r--src/libcryptobox/base64/sse42.c268
-rw-r--r--src/libcryptobox/catena/LICENSE20
-rw-r--r--src/libcryptobox/catena/README.md18
-rw-r--r--src/libcryptobox/catena/catena.c444
-rw-r--r--src/libcryptobox/catena/catena.h62
-rw-r--r--src/libcryptobox/chacha20/avx.S614
-rw-r--r--src/libcryptobox/chacha20/avx2.S1018
-rw-r--r--src/libcryptobox/chacha20/chacha.c262
-rw-r--r--src/libcryptobox/chacha20/chacha.h87
-rw-r--r--src/libcryptobox/chacha20/constants.S6
-rw-r--r--src/libcryptobox/chacha20/ref.c272
-rw-r--r--src/libcryptobox/chacha20/sse2.S734
-rw-r--r--src/libcryptobox/cryptobox.c1778
-rw-r--r--src/libcryptobox/cryptobox.h437
-rw-r--r--src/libcryptobox/keypair.c1021
-rw-r--r--src/libcryptobox/keypair.h317
-rw-r--r--src/libcryptobox/keypair_private.h143
-rw-r--r--src/libcryptobox/keypairs_cache.c141
-rw-r--r--src/libcryptobox/keypairs_cache.h57
-rw-r--r--src/libcryptobox/macro.S176
-rw-r--r--src/libcryptobox/platform_config.h.in16
27 files changed, 8936 insertions, 0 deletions
diff --git a/src/libcryptobox/AsmOpt.cmake b/src/libcryptobox/AsmOpt.cmake
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/libcryptobox/AsmOpt.cmake
diff --git a/src/libcryptobox/CMakeLists.txt b/src/libcryptobox/CMakeLists.txt
new file mode 100644
index 0000000..d1c8e3d
--- /dev/null
+++ b/src/libcryptobox/CMakeLists.txt
@@ -0,0 +1,41 @@
+SET(CHACHASRC ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/chacha.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/ref.c)
+
+SET(BASE64SRC ${CMAKE_CURRENT_SOURCE_DIR}/base64/ref.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/base64/base64.c)
+
+IF (HAVE_AVX2)
+ IF ("${ARCH}" STREQUAL "x86_64")
+ SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/avx2.S)
+ MESSAGE(STATUS "Cryptobox: AVX2 support is added (chacha20)")
+ ENDIF ()
+ SET(BASE64SRC ${BASE64SRC} ${CMAKE_CURRENT_SOURCE_DIR}/base64/avx2.c)
+ MESSAGE(STATUS "Cryptobox: AVX2 support is added (base64)")
+ENDIF (HAVE_AVX2)
+IF (HAVE_AVX)
+ IF ("${ARCH}" STREQUAL "x86_64")
+ SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/avx.S)
+ MESSAGE(STATUS "Cryptobox: AVX support is added (chacha20)")
+ ENDIF ()
+ENDIF (HAVE_AVX)
+IF (HAVE_SSE2)
+ IF ("${ARCH}" STREQUAL "x86_64")
+ SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/sse2.S)
+ MESSAGE(STATUS "Cryptobox: SSE2 support is added (chacha20)")
+ ENDIF ()
+ENDIF (HAVE_SSE2)
+IF (HAVE_SSE42)
+ IF ("${ARCH}" STREQUAL "x86_64")
+ SET(BASE64SRC ${BASE64SRC} ${CMAKE_CURRENT_SOURCE_DIR}/base64/sse42.c)
+ MESSAGE(STATUS "Cryptobox: SSE42 support is added (base64)")
+ ENDIF ()
+ENDIF (HAVE_SSE42)
+
+CONFIGURE_FILE(platform_config.h.in platform_config.h)
+INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}")
+SET(LIBCRYPTOBOXSRC ${CMAKE_CURRENT_SOURCE_DIR}/cryptobox.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/keypair.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/keypairs_cache.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/catena/catena.c)
+
+SET(RSPAMD_CRYPTOBOX ${LIBCRYPTOBOXSRC} ${CHACHASRC} ${BASE64SRC} PARENT_SCOPE)
diff --git a/src/libcryptobox/base64/avx2.c b/src/libcryptobox/base64/avx2.c
new file mode 100644
index 0000000..38abffc
--- /dev/null
+++ b/src/libcryptobox/base64/avx2.c
@@ -0,0 +1,287 @@
+/*-
+ * Copyright 2018 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*-
+Copyright (c) 2013-2015, Alfred Klomp
+Copyright (c) 2018, Vsevolod Stakhov
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "cryptobox.h"
+
+extern const uint8_t base64_table_dec[256];
+
+#ifdef RSPAMD_HAS_TARGET_ATTR
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC push_options
+#pragma GCC target("avx2")
+#endif
+#ifndef __SSE2__
+#define __SSE2__
+#endif
+#ifndef __SSE__
+#define __SSE__
+#endif
+#ifndef __SSE4_2__
+#define __SSE4_2__
+#endif
+#ifndef __SSE4_1__
+#define __SSE4_1__
+#endif
+#ifndef __SSEE3__
+#define __SSEE3__
+#endif
+#ifndef __AVX__
+#define __AVX__
+#endif
+#ifndef __AVX2__
+#define __AVX2__
+#endif
+
+#include <immintrin.h>
+
+#define CMPGT(s, n) _mm256_cmpgt_epi8((s), _mm256_set1_epi8(n))
+#define CMPEQ(s, n) _mm256_cmpeq_epi8((s), _mm256_set1_epi8(n))
+#define REPLACE(s, n) _mm256_and_si256((s), _mm256_set1_epi8(n))
+#define RANGE(s, a, b) _mm256_andnot_si256(CMPGT((s), (b)), CMPGT((s), (a) -1))
+
+static inline __m256i
+dec_reshuffle(__m256i in) __attribute__((__target__("avx2")));
+
+static inline __m256i
+dec_reshuffle(__m256i in)
+{
+ // in, lower lane, bits, upper case are most significant bits, lower case are least significant bits:
+ // 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
+ // 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
+ // 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
+ // 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
+
+ const __m256i merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140));
+ // 0000kkkk LLllllll 0000JJJJ JJjjKKKK
+ // 0000hhhh IIiiiiii 0000GGGG GGggHHHH
+ // 0000eeee FFffffff 0000DDDD DDddEEEE
+ // 0000bbbb CCcccccc 0000AAAA AAaaBBBB
+
+ __m256i out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000));
+ // 00000000 JJJJJJjj KKKKkkkk LLllllll
+ // 00000000 GGGGGGgg HHHHhhhh IIiiiiii
+ // 00000000 DDDDDDdd EEEEeeee FFffffff
+ // 00000000 AAAAAAaa BBBBbbbb CCcccccc
+
+ // Pack bytes together in each lane:
+ out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
+ 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
+ 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
+ // 00000000 00000000 00000000 00000000
+ // LLllllll KKKKkkkk JJJJJJjj IIiiiiii
+ // HHHHhhhh GGGGGGgg FFffffff EEEEeeee
+ // DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa
+
+ // Pack lanes
+ return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1));
+}
+
+
+#define INNER_LOOP_AVX2 \
+ while (inlen >= 45) { \
+ __m256i str = _mm256_loadu_si256((__m256i *) c); \
+ const __m256i lut_lo = _mm256_setr_epi8( \
+ 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \
+ 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A, \
+ 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \
+ 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); \
+ const __m256i lut_hi = _mm256_setr_epi8( \
+ 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, \
+ 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); \
+ const __m256i lut_roll = _mm256_setr_epi8( \
+ 0, 16, 19, 4, -65, -65, -71, -71, \
+ 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 16, 19, 4, -65, -65, -71, -71, \
+ 0, 0, 0, 0, 0, 0, 0, 0); \
+ const __m256i mask_2F = _mm256_set1_epi8(0x2f); \
+ const __m256i hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), mask_2F); \
+ const __m256i lo_nibbles = _mm256_and_si256(str, mask_2F); \
+ const __m256i hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles); \
+ const __m256i lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles); \
+ const __m256i eq_2F = _mm256_cmpeq_epi8(str, mask_2F); \
+ const __m256i roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2F, hi_nibbles)); \
+ if (!_mm256_testz_si256(lo, hi)) { \
+ seen_error = true; \
+ break; \
+ } \
+ str = _mm256_add_epi8(str, roll); \
+ str = dec_reshuffle(str); \
+ _mm256_storeu_si256((__m256i *) o, str); \
+ c += 32; \
+ o += 24; \
+ outl += 24; \
+ inlen -= 32; \
+ }
+
+int base64_decode_avx2(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen) __attribute__((__target__("avx2")));
+int base64_decode_avx2(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen)
+{
+ ssize_t ret = 0;
+ const uint8_t *c = (const uint8_t *) in;
+ uint8_t *o = (uint8_t *) out;
+ uint8_t q, carry;
+ size_t outl = 0;
+ size_t leftover = 0;
+ bool seen_error = false;
+
+repeat:
+ switch (leftover) {
+ for (;;) {
+ case 0:
+ if (G_LIKELY(!seen_error)) {
+ INNER_LOOP_AVX2
+ }
+
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ ret = 0;
+ break;
+ }
+ carry = q << 2;
+ leftover++;
+
+ case 1:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ ret = 0;
+ break;
+ }
+ *o++ = carry | (q >> 4);
+ carry = q << 4;
+ leftover++;
+ outl++;
+
+ case 2:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ leftover++;
+
+ if (q == 254) {
+ if (inlen-- != 0) {
+ leftover = 0;
+ q = base64_table_dec[*c++];
+ ret = ((q == 254) && (inlen == 0)) ? 1 : 0;
+ break;
+ }
+ else {
+ ret = 1;
+ break;
+ }
+ }
+ else {
+ leftover--;
+ }
+ /* If we get here, there was an error: */
+ break;
+ }
+ *o++ = carry | (q >> 2);
+ carry = q << 6;
+ leftover++;
+ outl++;
+
+ case 3:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ /*
+ * When q == 254, the input char is '='. Return 1 and EOF.
+ * When q == 255, the input char is invalid. Return 0 and EOF.
+ */
+ if (q == 254 && inlen == 0) {
+ ret = 1;
+ leftover = 0;
+ }
+ else {
+ ret = 0;
+ }
+
+ break;
+ }
+
+ *o++ = carry | q;
+ carry = 0;
+ leftover = 0;
+ outl++;
+ }
+ }
+
+ if (!ret && inlen > 0) {
+ /* Skip to the next valid character in input */
+ while (inlen > 0 && base64_table_dec[*c] >= 254) {
+ c++;
+ inlen--;
+ }
+
+ if (inlen > 0) {
+ seen_error = false;
+ goto repeat;
+ }
+ }
+
+ *outlen = outl;
+
+ return ret;
+}
+
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC pop_options
+#endif
+#endif
diff --git a/src/libcryptobox/base64/base64.c b/src/libcryptobox/base64/base64.c
new file mode 100644
index 0000000..e868924
--- /dev/null
+++ b/src/libcryptobox/base64/base64.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "cryptobox.h"
+#include "base64.h"
+#include "platform_config.h"
+#include "str_util.h"
+#include "util.h"
+#include "contrib/libottery/ottery.h"
+
+extern unsigned cpu_config;
+const uint8_t
+ base64_table_dec[256] =
+ {
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 62,
+ 255,
+ 255,
+ 255,
+ 63,
+ 52,
+ 53,
+ 54,
+ 55,
+ 56,
+ 57,
+ 58,
+ 59,
+ 60,
+ 61,
+ 255,
+ 255,
+ 255,
+ 254,
+ 255,
+ 255,
+ 255,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 11,
+ 12,
+ 13,
+ 14,
+ 15,
+ 16,
+ 17,
+ 18,
+ 19,
+ 20,
+ 21,
+ 22,
+ 23,
+ 24,
+ 25,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 26,
+ 27,
+ 28,
+ 29,
+ 30,
+ 31,
+ 32,
+ 33,
+ 34,
+ 35,
+ 36,
+ 37,
+ 38,
+ 39,
+ 40,
+ 41,
+ 42,
+ 43,
+ 44,
+ 45,
+ 46,
+ 47,
+ 48,
+ 49,
+ 50,
+ 51,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+};
+
+static const char base64_alphabet[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
+
+typedef struct base64_impl {
+ unsigned short enabled;
+ unsigned short min_len;
+ unsigned int cpu_flags;
+ const char *desc;
+ int (*decode)(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen);
+} base64_impl_t;
+
+#define BASE64_DECLARE(ext) \
+ int base64_decode_##ext(const char *in, size_t inlen, unsigned char *out, size_t *outlen);
+#define BASE64_IMPL(cpuflags, min_len, desc, ext) \
+ { \
+ 0, (min_len), (cpuflags), desc, base64_decode_##ext \
+ }
+
+BASE64_DECLARE(ref);
+#define BASE64_REF BASE64_IMPL(0, 0, "ref", ref)
+
+#ifdef RSPAMD_HAS_TARGET_ATTR
+#if defined(HAVE_SSE42) && defined(__x86_64__)
+int base64_decode_sse42(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2")));
+
+BASE64_DECLARE(sse42);
+#define BASE64_SSE42 BASE64_IMPL(CPUID_SSE42, 24, "sse42", sse42)
+#endif
+#endif
+
+#ifdef RSPAMD_HAS_TARGET_ATTR
+#if defined(HAVE_AVX2) && defined(__x86_64__)
+int base64_decode_avx2(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen) __attribute__((__target__("avx2")));
+
+BASE64_DECLARE(avx2);
+#define BASE64_AVX2 BASE64_IMPL(CPUID_AVX2, 128, "avx2", avx2)
+#endif
+#endif
+
+static base64_impl_t base64_list[] = {
+ BASE64_REF,
+#ifdef BASE64_SSE42
+ BASE64_SSE42,
+#endif
+#ifdef BASE64_AVX2
+ BASE64_AVX2,
+#endif
+};
+
+static const base64_impl_t *base64_ref = &base64_list[0];
+
+const char *
+base64_load(void)
+{
+ guint i;
+ const base64_impl_t *opt_impl = base64_ref;
+
+ /* Enable reference */
+ base64_list[0].enabled = true;
+
+ if (cpu_config != 0) {
+ for (i = 1; i < G_N_ELEMENTS(base64_list); i++) {
+ if (base64_list[i].cpu_flags & cpu_config) {
+ base64_list[i].enabled = true;
+ opt_impl = &base64_list[i];
+ }
+ }
+ }
+
+
+ return opt_impl->desc;
+}
+
+gboolean
+rspamd_cryptobox_base64_decode(const gchar *in, gsize inlen,
+ guchar *out, gsize *outlen)
+{
+ const base64_impl_t *opt_impl = base64_ref;
+
+ for (gint i = G_N_ELEMENTS(base64_list) - 1; i > 0; i--) {
+ if (base64_list[i].enabled && base64_list[i].min_len <= inlen) {
+ opt_impl = &base64_list[i];
+ break;
+ }
+ }
+
+ return opt_impl->decode(in, inlen, out, outlen);
+}
+
+double
+base64_test(bool generic, size_t niters, size_t len, size_t str_len)
+{
+ size_t cycles;
+ guchar *in, *out, *tmp;
+ gdouble t1, t2, total = 0;
+ gsize outlen;
+
+ g_assert(len > 0);
+ in = g_malloc(len);
+ tmp = g_malloc(len);
+ ottery_rand_bytes(in, len);
+
+ out = rspamd_encode_base64_fold(in, len, str_len, &outlen,
+ RSPAMD_TASK_NEWLINES_CRLF);
+
+ if (generic) {
+ base64_list[0].decode(out, outlen, tmp, &len);
+ }
+ else {
+ rspamd_cryptobox_base64_decode(out, outlen, tmp, &len);
+ }
+
+ g_assert(memcmp(in, tmp, len) == 0);
+
+ for (cycles = 0; cycles < niters; cycles++) {
+ t1 = rspamd_get_ticks(TRUE);
+ if (generic) {
+ base64_list[0].decode(out, outlen, tmp, &len);
+ }
+ else {
+ rspamd_cryptobox_base64_decode(out, outlen, tmp, &len);
+ }
+ t2 = rspamd_get_ticks(TRUE);
+ total += t2 - t1;
+ }
+
+ g_free(in);
+ g_free(tmp);
+ g_free(out);
+
+ return total;
+}
+
+
+gboolean
+rspamd_cryptobox_base64_is_valid(const gchar *in, gsize inlen)
+{
+ const guchar *p, *end;
+
+ if (inlen == 0) {
+ return FALSE;
+ }
+
+ p = in;
+ end = in + inlen;
+
+ while (p < end && *p != '=') {
+ if (!g_ascii_isspace(*p)) {
+ if (base64_table_dec[*p] == 255) {
+ return FALSE;
+ }
+ }
+ p++;
+ }
+
+ return TRUE;
+} \ No newline at end of file
diff --git a/src/libcryptobox/base64/base64.h b/src/libcryptobox/base64/base64.h
new file mode 100644
index 0000000..f53c80a
--- /dev/null
+++ b/src/libcryptobox/base64/base64.h
@@ -0,0 +1,31 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBCRYPTOBOX_BASE64_BASE64_H_
+#define SRC_LIBCRYPTOBOX_BASE64_BASE64_H_
+
+#include "config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+const char *base64_load(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBCRYPTOBOX_BASE64_BASE64_H_ */
diff --git a/src/libcryptobox/base64/ref.c b/src/libcryptobox/base64/ref.c
new file mode 100644
index 0000000..61df68e
--- /dev/null
+++ b/src/libcryptobox/base64/ref.c
@@ -0,0 +1,241 @@
+/*-
+Copyright (c) 2013-2015, Alfred Klomp
+Copyright (c) 2016, Vsevolod Stakhov
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "libutil/util.h"
+
+extern const uint8_t base64_table_dec[256];
+
+#define INNER_LOOP_64 \
+ do { \
+ uint64_t str, res, dec; \
+ bool aligned = rspamd_is_aligned_as(c, str); \
+ while (inlen >= 13) { \
+ if (aligned) { str = *(uint64_t *) c; } \
+ else { \
+ memcpy(&str, c, sizeof(str)); \
+ } \
+ str = GUINT64_TO_BE(str); \
+ if ((dec = base64_table_dec[str >> 56]) > 63) { \
+ break; \
+ } \
+ res = dec << 58; \
+ if ((dec = base64_table_dec[(str >> 48) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 52; \
+ if ((dec = base64_table_dec[(str >> 40) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 46; \
+ if ((dec = base64_table_dec[(str >> 32) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 40; \
+ if ((dec = base64_table_dec[(str >> 24) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 34; \
+ if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 28; \
+ if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 22; \
+ if ((dec = base64_table_dec[str & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 16; \
+ res = GUINT64_FROM_BE(res); \
+ memcpy(o, &res, sizeof(res)); \
+ c += 8; \
+ o += 6; \
+ outl += 6; \
+ inlen -= 8; \
+ } \
+ } while (0)
+
+#define INNER_LOOP_32 \
+ do { \
+ uint32_t str, res, dec; \
+ bool aligned = rspamd_is_aligned_as(c, str); \
+ while (inlen >= 8) { \
+ if (aligned) { str = *(uint32_t *) c; } \
+ else { \
+ memcpy(&str, c, sizeof(str)); \
+ } \
+ str = GUINT32_TO_BE(str); \
+ if ((dec = base64_table_dec[str >> 24]) > 63) { \
+ break; \
+ } \
+ res = dec << 26; \
+ if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 20; \
+ if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 14; \
+ if ((dec = base64_table_dec[str & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 8; \
+ res = GUINT32_FROM_BE(res); \
+ memcpy(o, &res, sizeof(res)); \
+ c += 4; \
+ o += 3; \
+ outl += 3; \
+ inlen -= 4; \
+ } \
+ } while (0)
+
+
+int base64_decode_ref(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen)
+{
+ ssize_t ret = 0;
+ const uint8_t *c = (const uint8_t *) in;
+ uint8_t *o = (uint8_t *) out;
+ uint8_t q, carry;
+ size_t outl = 0;
+ size_t leftover = 0;
+
+repeat:
+ switch (leftover) {
+ for (;;) {
+ case 0:
+#if defined(__LP64__)
+ INNER_LOOP_64;
+#else
+ INNER_LOOP_32;
+#endif
+
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ ret = 0;
+ break;
+ }
+ carry = (uint8_t) (q << 2);
+ leftover++;
+
+ case 1:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ ret = 0;
+ break;
+ }
+ *o++ = carry | (q >> 4);
+ carry = (uint8_t) (q << 4);
+ leftover++;
+ outl++;
+
+ case 2:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ leftover++;
+
+ if (q == 254) {
+ if (inlen-- != 0) {
+ leftover = 0;
+ q = base64_table_dec[*c++];
+ ret = ((q == 254) && (inlen == 0)) ? 1 : 0;
+ break;
+ }
+ else {
+ ret = 1;
+ break;
+ }
+ }
+ else {
+ leftover--;
+ }
+ /* If we get here, there was an error: */
+ break;
+ }
+ *o++ = carry | (q >> 2);
+ carry = (uint8_t) (q << 6);
+ leftover++;
+ outl++;
+
+ case 3:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ /*
+ * When q == 254, the input char is '='. Return 1 and EOF.
+ * When q == 255, the input char is invalid. Return 0 and EOF.
+ */
+ if (q == 254 && inlen == 0) {
+ ret = 1;
+ leftover = 0;
+ }
+ else {
+ ret = 0;
+ }
+
+ break;
+ }
+
+ *o++ = carry | q;
+ carry = 0;
+ leftover = 0;
+ outl++;
+ }
+ }
+
+ if (!ret && inlen > 0) {
+ /* Skip to the next valid character in input */
+ while (inlen > 0 && base64_table_dec[*c] >= 254) {
+ c++;
+ inlen--;
+ }
+
+ if (inlen > 0) {
+ goto repeat;
+ }
+ }
+
+ *outlen = outl;
+
+ return ret;
+}
diff --git a/src/libcryptobox/base64/sse42.c b/src/libcryptobox/base64/sse42.c
new file mode 100644
index 0000000..36070ab
--- /dev/null
+++ b/src/libcryptobox/base64/sse42.c
@@ -0,0 +1,268 @@
+/*-
+ * Copyright 2017 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*-
+Copyright (c) 2013-2015, Alfred Klomp
+Copyright (c) 2016, Vsevolod Stakhov
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "cryptobox.h"
+
+extern const uint8_t base64_table_dec[256];
+
+#ifdef RSPAMD_HAS_TARGET_ATTR
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#endif
+#ifndef __SSE2__
+#define __SSE2__
+#endif
+#ifndef __SSE__
+#define __SSE__
+#endif
+#ifndef __SSE4_2__
+#define __SSE4_2__
+#endif
+#ifndef __SSE4_1__
+#define __SSE4_1__
+#endif
+#ifndef __SSEE3__
+#define __SSEE3__
+#endif
+#include <xmmintrin.h>
+#include <nmmintrin.h>
+
+
+static inline __m128i
+dec_reshuffle(__m128i in) __attribute__((__target__("sse4.2")));
+
+static inline __m128i dec_reshuffle(__m128i in)
+{
+ // Mask in a single byte per shift:
+ const __m128i maskB2 = _mm_set1_epi32(0x003F0000);
+ const __m128i maskB1 = _mm_set1_epi32(0x00003F00);
+
+ // Pack bytes together:
+ __m128i out = _mm_srli_epi32(in, 16);
+
+ out = _mm_or_si128(out, _mm_srli_epi32(_mm_and_si128(in, maskB2), 2));
+
+ out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, maskB1), 12));
+
+ out = _mm_or_si128(out, _mm_slli_epi32(in, 26));
+
+ // Reshuffle and repack into 12-byte output format:
+ return _mm_shuffle_epi8(out, _mm_setr_epi8(
+ 3, 2, 1,
+ 7, 6, 5,
+ 11, 10, 9,
+ 15, 14, 13,
+ -1, -1, -1, -1));
+}
+
+#define CMPGT(s, n) _mm_cmpgt_epi8((s), _mm_set1_epi8(n))
+
+#define INNER_LOOP_SSE42 \
+ while (inlen >= 24) { \
+ __m128i str = _mm_loadu_si128((__m128i *) c); \
+ const __m128i lut = _mm_setr_epi8( \
+ 19, 16, 4, 4, \
+ 4, 4, 4, 4, \
+ 4, 4, 4, 4, \
+ 0, 0, -71, -65); \
+ const __m128i range = _mm_setr_epi8( \
+ '+', '+', \
+ '+', '+', \
+ '+', '+', \
+ '+', '+', \
+ '/', '/', \
+ '0', '9', \
+ 'A', 'Z', \
+ 'a', 'z'); \
+ if (_mm_cmpistrc(range, str, _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_NEGATIVE_POLARITY)) { \
+ seen_error = true; \
+ break; \
+ } \
+ __m128i indices = _mm_subs_epu8(str, _mm_set1_epi8(46)); \
+ __m128i mask45 = CMPGT(str, 64); \
+ __m128i mask5 = CMPGT(str, 96); \
+ indices = _mm_andnot_si128(mask45, indices); \
+ mask45 = _mm_add_epi8(_mm_slli_epi16(_mm_abs_epi8(mask45), 4), mask45); \
+ indices = _mm_add_epi8(indices, mask45); \
+ indices = _mm_add_epi8(indices, mask5); \
+ __m128i delta = _mm_shuffle_epi8(lut, indices); \
+ str = _mm_add_epi8(str, delta); \
+ str = dec_reshuffle(str); \
+ _mm_storeu_si128((__m128i *) o, str); \
+ c += 16; \
+ o += 12; \
+ outl += 12; \
+ inlen -= 16; \
+ }
+
+int base64_decode_sse42(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2")));
+int base64_decode_sse42(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen)
+{
+ ssize_t ret = 0;
+ const uint8_t *c = (const uint8_t *) in;
+ uint8_t *o = (uint8_t *) out;
+ uint8_t q, carry;
+ size_t outl = 0;
+ size_t leftover = 0;
+ bool seen_error = false;
+
+repeat:
+ switch (leftover) {
+ for (;;) {
+ case 0:
+ if (G_LIKELY(!seen_error)) {
+ INNER_LOOP_SSE42
+ }
+
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ ret = 0;
+ break;
+ }
+ carry = q << 2;
+ leftover++;
+
+ case 1:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ ret = 0;
+ break;
+ }
+ *o++ = carry | (q >> 4);
+ carry = q << 4;
+ leftover++;
+ outl++;
+
+ case 2:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ leftover++;
+
+ if (q == 254) {
+ if (inlen-- != 0) {
+ leftover = 0;
+ q = base64_table_dec[*c++];
+ ret = ((q == 254) && (inlen == 0)) ? 1 : 0;
+ break;
+ }
+ else {
+ ret = 1;
+ break;
+ }
+ }
+ else {
+ leftover--;
+ }
+ /* If we get here, there was an error: */
+ break;
+ }
+ *o++ = carry | (q >> 2);
+ carry = q << 6;
+ leftover++;
+ outl++;
+
+ case 3:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ /*
+ * When q == 254, the input char is '='. Return 1 and EOF.
+ * When q == 255, the input char is invalid. Return 0 and EOF.
+ */
+ if (q == 254 && inlen == 0) {
+ ret = 1;
+ leftover = 0;
+ }
+ else {
+ ret = 0;
+ }
+
+ break;
+ }
+
+ *o++ = carry | q;
+ carry = 0;
+ leftover = 0;
+ outl++;
+ }
+ }
+
+ if (!ret && inlen > 0) {
+ /* Skip to the next valid character in input */
+ while (inlen > 0 && base64_table_dec[*c] >= 254) {
+ c++;
+ inlen--;
+ }
+
+ if (inlen > 0) {
+ seen_error = false;
+ goto repeat;
+ }
+ }
+
+ *outlen = outl;
+
+ return ret;
+}
+
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC pop_options
+#endif
+#endif
diff --git a/src/libcryptobox/catena/LICENSE b/src/libcryptobox/catena/LICENSE
new file mode 100644
index 0000000..ff22dc8
--- /dev/null
+++ b/src/libcryptobox/catena/LICENSE
@@ -0,0 +1,20 @@
+The MIT License (MIT)
+
+Copyright (c) 2014 cforler
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/src/libcryptobox/catena/README.md b/src/libcryptobox/catena/README.md
new file mode 100644
index 0000000..4a0f948
--- /dev/null
+++ b/src/libcryptobox/catena/README.md
@@ -0,0 +1,18 @@
+Catena
+======
+Catena is a memory-consuming password scrambler that excellently
+thwarts massively parallel attacks on cheap memory-constrained
+hardware, such as recent graphical processing units (GPUs).
+Furthermore, Catena provides resistance against cache-timing attacks, since
+its memory-access pattern is password-independent.
+
+Academic paper:
+<a href="http://www.uni-weimar.de/fileadmin/user/fak/medien/professuren/Mediensicherheit/Research/Publications/catena-v3.1.pdf">catena-v3.1.pdf</a>
+
+Rspamd specific
+---------------
+
+Rspamd implements Catena-Butterfly using full blake2b hash implemented in the
+cryptobox.
+
+Original code: https://github.com/medsec/catena \ No newline at end of file
diff --git a/src/libcryptobox/catena/catena.c b/src/libcryptobox/catena/catena.c
new file mode 100644
index 0000000..7e066dd
--- /dev/null
+++ b/src/libcryptobox/catena/catena.c
@@ -0,0 +1,444 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ * Copyright (c) 2014 cforler
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+ * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "config.h"
+#include "catena.h"
+
+#include <sodium.h>
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define TO_LITTLE_ENDIAN_64(n) (n)
+#define TO_LITTLE_ENDIAN_32(n) (n)
+#else
+#define TO_LITTLE_ENDIAN_64 GUINT64_SWAP_LE_BE
+#define TO_LITTLE_ENDIAN_32 GUINT32_SWAP_LE_BE
+#endif
+
+/* Recommended default values */
+#define H_LEN CATENA_HLEN
+#define KEY_LEN 16
+
+const uint8_t VERSION_ID[] = "Butterfly-Full";
+const uint8_t LAMBDA = 4;
+const uint8_t GARLIC = 16;
+const uint8_t MIN_GARLIC = 16;
+
+/*
+ * Hash part
+ */
+
+static inline void
+__Hash1(const uint8_t *input, const uint32_t inputlen,
+ uint8_t hash[H_LEN])
+{
+ crypto_generichash_blake2b_state ctx;
+ crypto_generichash_blake2b_init(&ctx, NULL, 0, H_LEN);
+ crypto_generichash_blake2b_update(&ctx, input, inputlen);
+ crypto_generichash_blake2b_final(&ctx, hash, H_LEN);
+}
+
+/***************************************************/
+
+static inline void __Hash2(const uint8_t *i1, const uint8_t i1len, const uint8_t *i2,
+ const uint8_t i2len, uint8_t hash[H_LEN])
+{
+ crypto_generichash_blake2b_state ctx;
+
+ crypto_generichash_blake2b_init(&ctx, NULL, 0, H_LEN);
+ crypto_generichash_blake2b_update(&ctx, i1, i1len);
+ crypto_generichash_blake2b_update(&ctx, i2, i2len);
+ crypto_generichash_blake2b_final(&ctx, hash, H_LEN);
+}
+
+/***************************************************/
+
+static inline void __Hash3(const uint8_t *i1, const uint8_t i1len, const uint8_t *i2,
+ const uint8_t i2len, const uint8_t *i3, const uint8_t i3len,
+ uint8_t hash[H_LEN])
+{
+ crypto_generichash_blake2b_state ctx;
+
+ crypto_generichash_blake2b_init(&ctx, NULL, 0, H_LEN);
+ crypto_generichash_blake2b_update(&ctx, i1, i1len);
+ crypto_generichash_blake2b_update(&ctx, i2, i2len);
+ crypto_generichash_blake2b_update(&ctx, i3, i3len);
+ crypto_generichash_blake2b_final(&ctx, hash, H_LEN);
+}
+
+/***************************************************/
+
+static inline void __Hash4(const uint8_t *i1, const uint8_t i1len, const uint8_t *i2,
+ const uint8_t i2len, const uint8_t *i3, const uint8_t i3len,
+ const uint8_t *i4, const uint8_t i4len, uint8_t hash[H_LEN])
+{
+ crypto_generichash_blake2b_state ctx;
+
+ crypto_generichash_blake2b_init(&ctx, NULL, 0, H_LEN);
+ crypto_generichash_blake2b_update(&ctx, i1, i1len);
+ crypto_generichash_blake2b_update(&ctx, i2, i2len);
+ crypto_generichash_blake2b_update(&ctx, i3, i3len);
+ crypto_generichash_blake2b_update(&ctx, i4, i4len);
+ crypto_generichash_blake2b_final(&ctx, hash, H_LEN);
+}
+
+/***************************************************/
+
+static inline void __Hash5(const uint8_t *i1, const uint8_t i1len, const uint8_t *i2,
+ const uint8_t i2len, const uint8_t *i3, const uint8_t i3len,
+ const uint8_t *i4, const uint8_t i4len, const uint8_t *i5,
+ const uint8_t i5len, uint8_t hash[H_LEN])
+{
+ crypto_generichash_blake2b_state ctx;
+
+ crypto_generichash_blake2b_init(&ctx, NULL, 0, H_LEN);
+ crypto_generichash_blake2b_update(&ctx, i1, i1len);
+ crypto_generichash_blake2b_update(&ctx, i2, i2len);
+ crypto_generichash_blake2b_update(&ctx, i3, i3len);
+ crypto_generichash_blake2b_update(&ctx, i4, i4len);
+ crypto_generichash_blake2b_update(&ctx, i5, i5len);
+ crypto_generichash_blake2b_final(&ctx, hash, H_LEN);
+}
+
+static inline void
+__HashFast(int vindex, const uint8_t *i1, const uint8_t *i2,
+ uint8_t hash[H_LEN])
+{
+ __Hash2(i1, H_LEN, i2, H_LEN, hash);
+}
+
+static void __ResetState(void)
+{
+}
+
+/*
+ * Misc utils
+ */
+const uint8_t ZERO8[H_LEN] = {0};
+
+/* see: http://en.wikipedia.org/wiki/Xorshift#Variations */
+static int p;
+static uint64_t s[16];
+
+static void
+initXSState(const uint8_t *a, const uint8_t *b)
+{
+ p = 0;
+
+ for (int i = 0; i < 8; i++) {
+ s[i] = UINT64_C(0);
+ s[i + 8] = UINT64_C(0);
+
+ for (int j = 0; j < 8; j++) {
+ s[i] |= ((uint64_t) a[i * 8 + j]) << j * 8;
+ s[i + 8] |= ((uint64_t) b[i * 8 + j]) << j * 8;
+ }
+ }
+}
+
+static uint64_t
+xorshift1024star(void)
+{
+ uint64_t s0 = s[p];
+ uint64_t s1 = s[p = (p + 1) & 15];
+ s1 ^= s1 << 31;
+ s1 ^= s1 >> 11;
+ s0 ^= s0 >> 30;
+ return (s[p] = s0 ^ s1) * UINT64_C(1181783497276652981);
+}
+
+static void
+H_INIT(const uint8_t *x, const uint16_t xlen, uint8_t *vm1, uint8_t *vm2)
+{
+ const uint8_t l = 2;
+ uint8_t *tmp = (uint8_t *) g_malloc(l * H_LEN);
+
+ for (uint8_t i = 0; i != l; ++i) {
+ __Hash2(&i, 1, x, xlen, tmp + i * H_LEN);
+ }
+
+ memcpy(vm1, tmp, H_LEN);
+ memcpy(vm2, tmp + (l / 2 * H_LEN), H_LEN);
+ g_free(tmp);
+}
+
+static void
+H_First(const uint8_t *i1, const uint8_t *i2, uint8_t *hash)
+{
+ uint8_t i = 0;
+ uint8_t *x = (uint8_t *) g_malloc(H_LEN);
+
+ __ResetState();
+ __Hash2(i1, H_LEN, i2, H_LEN, x);
+ __Hash2(&i, 1, x, H_LEN, hash);
+ g_free(x);
+}
+
+static inline void
+initmem(const uint8_t x[H_LEN], const uint64_t c, uint8_t *r)
+{
+ uint8_t *vm2 = (uint8_t *) g_malloc(H_LEN);
+ uint8_t *vm1 = (uint8_t *) g_malloc(H_LEN);
+
+ H_INIT(x, H_LEN, vm1, vm2);
+ __ResetState();
+ __HashFast(0, vm1, vm2, r);
+ __HashFast(1, r, vm1, r + H_LEN);
+
+ /* Top row */
+ for (uint64_t i = 2; i < c; i++) {
+ __HashFast(i, r + (i - 1) * H_LEN, r + (i - 2) * H_LEN, r + i * H_LEN);
+ }
+
+ g_free(vm2);
+ g_free(vm1);
+}
+
+static inline void
+catena_gamma(const uint8_t garlic, const uint8_t *salt,
+ const uint8_t saltlen, uint8_t *r)
+{
+ const uint64_t q = UINT64_C(1) << ((3 * garlic + 3) / 4);
+
+ uint64_t i, j, j2;
+ uint8_t *tmp = g_malloc(H_LEN);
+ uint8_t *tmp2 = g_malloc(H_LEN);
+
+ __Hash1(salt, saltlen, tmp);
+ __Hash1(tmp, H_LEN, tmp2);
+ initXSState(tmp, tmp2);
+
+ __ResetState();
+ for (i = 0; i < q; i++) {
+ j = xorshift1024star() >> (64 - garlic);
+ j2 = xorshift1024star() >> (64 - garlic);
+ __HashFast(i, r + j * H_LEN, r + j2 * H_LEN, r + j * H_LEN);
+ }
+
+ g_free(tmp);
+ g_free(tmp2);
+}
+
+static void
+XOR(const uint8_t *input1, const uint8_t *input2, uint8_t *output)
+{
+ uint32_t i;
+
+ for (i = 0; i < H_LEN; i++) {
+ output[i] = input1[i] ^ input2[i];
+ }
+}
+
+/*
+ * Butterfly part
+ */
+/*
+ * Sigma function that defines the diagonal connections of a DBG
+ * diagonal front: flip the (g-i)th bit (Inverse Butterfly Graph)
+ * diagonal back: flip the i-(g-1)th bit (Regular Butterfly Graph)
+ */
+static uint64_t
+sigma(const uint8_t g, const uint64_t i, const uint64_t j)
+{
+ if (i < g) {
+ return (j ^ (UINT64_C(1) << (g - 1 - i))); /* diagonal front */
+ }
+ else {
+ return (j ^ (UINT64_C(1) << (i - (g - 1)))); /* diagonal back */
+ }
+}
+
+/*calculate actual index from level and element index*/
+static uint64_t
+idx(uint64_t i, uint64_t j, uint8_t co, uint64_t c, uint64_t m)
+{
+ i += co;
+ if (i % 3 == 0) {
+ return j;
+ }
+ else if (i % 3 == 1) {
+ if (j < m) {
+ /* still fits in the array */
+ return j + c;
+ }
+ else {
+ /* start overwriting elements at the beginning */
+ return j - m;
+ }
+ }
+ /* i % 3 == 2 */
+ return j + m;
+}
+
+/*
+ * Computes the hash of x using a Double Butterfly Graph,
+ * that forms as (2^g,\lamba)-Superconcentrator
+ */
+static void
+Flap(const uint8_t x[H_LEN], const uint8_t lambda, const uint8_t garlic,
+ const uint8_t *salt, const uint8_t saltlen, uint8_t h[H_LEN])
+{
+ const uint64_t c = UINT64_C(1) << garlic;
+ const uint64_t m = UINT64_C(1) << (garlic - 1); /* 0.5 * 2^g */
+ const uint32_t l = 2 * garlic;
+
+ uint8_t *r = g_malloc((c + m) * H_LEN);
+ uint8_t *tmp = g_malloc(H_LEN);
+ uint64_t i, j;
+ uint8_t k;
+ uint8_t co = 0; /* carry over from last iteration */
+
+ /* Top row */
+ initmem(x, c, r);
+
+ /*Gamma Function*/
+ catena_gamma(garlic, salt, saltlen, r);
+
+ /* DBH */
+ for (k = 0; k < lambda; k++) {
+ for (i = 1; i < l; i++) {
+ XOR(r + idx(i - 1, c - 1, co, c, m) * H_LEN,
+ r + idx(i - 1, 0, co, c, m) * H_LEN, tmp);
+
+ /*
+ * r0 := H(tmp || vsigma(g,i-1,0) )
+ * __Hash2(tmp, H_LEN, r+idx(i-1,sigma(garlic,i-1,0),co,c,m) * H_LEN, H_LEN,
+ * r+idx(i,0,co,c,m) *H_LEN);
+ */
+ H_First(tmp,
+ r + idx(i - 1, sigma(garlic, i - 1, 0), co, c, m) * H_LEN,
+ r + idx(i, 0, co, c, m) * H_LEN);
+ __ResetState();
+
+ /* vertices */
+ for (j = 1; j < c; j++) {
+ /* tmp:= rj-1 XOR vj */
+ XOR(r + idx(i, j - 1, co, c, m) * H_LEN,
+ r + idx(i - 1, j, co, c, m) * H_LEN, tmp);
+ /* rj := H(tmp || vsigma(g,i-1,j)) */
+ __HashFast(j, tmp,
+ r + idx(i - 1, sigma(garlic, i - 1, j), co, c, m) * H_LEN,
+ r + idx(i, j, co, c, m) * H_LEN);
+ }
+ }
+ co = (co + (i - 1)) % 3;
+ }
+
+ memcpy(h, r + idx(0, c - 1, co, c, m) * H_LEN, H_LEN);
+ g_free(r);
+ g_free(tmp);
+}
+
+static int
+__Catena(const uint8_t *pwd, const uint32_t pwdlen,
+ const uint8_t *salt, const uint8_t saltlen, const uint8_t *data,
+ const uint32_t datalen, const uint8_t lambda, const uint8_t min_garlic,
+ const uint8_t garlic, const uint8_t hashlen, const uint8_t client,
+ const uint8_t tweak_id, uint8_t *hash)
+{
+ uint8_t x[H_LEN];
+ uint8_t hv[H_LEN];
+ uint8_t t[4];
+ uint8_t c;
+
+ if ((hashlen > H_LEN) || (garlic > 63) || (min_garlic > garlic) || (lambda == 0) || (min_garlic == 0)) {
+ return -1;
+ }
+
+ /*Compute H(V)*/
+ __Hash1(VERSION_ID, strlen((char *) VERSION_ID), hv);
+
+ /* Compute Tweak */
+ t[0] = tweak_id;
+ t[1] = lambda;
+ t[2] = hashlen;
+ t[3] = saltlen;
+
+ /* Compute H(AD) */
+ __Hash1((uint8_t *) data, datalen, x);
+
+ /* Compute the initial value to hash */
+ __Hash5(hv, H_LEN, t, 4, x, H_LEN, pwd, pwdlen, salt, saltlen, x);
+
+ /*Overwrite Password if enabled*/
+#ifdef OVERWRITE
+ erasepwd(pwd, pwdlen);
+#endif
+
+ Flap(x, lambda, (min_garlic + 1) / 2, salt, saltlen, x);
+
+ for (c = min_garlic; c <= garlic; c++) {
+ Flap(x, lambda, c, salt, saltlen, x);
+ if ((c == garlic) && (client == CLIENT)) {
+ memcpy(hash, x, H_LEN);
+ return 0;
+ }
+ __Hash2(&c, 1, x, H_LEN, x);
+ memset(x + hashlen, 0, H_LEN - hashlen);
+ }
+
+ memcpy(hash, x, hashlen);
+
+ return 0;
+}
+
+/***************************************************/
+
+int catena(const uint8_t *pwd, const uint32_t pwdlen, const uint8_t *salt,
+ const uint8_t saltlen, const uint8_t *data, const uint32_t datalen,
+ const uint8_t lambda, const uint8_t min_garlic, const uint8_t garlic,
+ const uint8_t hashlen, uint8_t *hash)
+{
+ return __Catena(pwd, pwdlen, salt, saltlen, data, datalen, lambda,
+ min_garlic, garlic, hashlen, REGULAR, PASSWORD_HASHING_MODE, hash);
+}
+
+int simple_catena(const uint8_t *pwd, const uint32_t pwdlen,
+ const uint8_t *salt, const uint8_t saltlen,
+ const uint8_t *data, const uint32_t datalen,
+ uint8_t hash[H_LEN])
+{
+ return __Catena(pwd, pwdlen, salt, saltlen, data, datalen,
+ LAMBDA, MIN_GARLIC, GARLIC, H_LEN,
+ REGULAR, PASSWORD_HASHING_MODE, hash);
+}
+
+int catena_test(void)
+{
+ /* From catena-v3.1 spec */
+ guint8 pw[] = {0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64};
+ guint8 salt[] = {0x73, 0x61, 0x6c, 0x74};
+ guint8 ad[] = {0x64, 0x61, 0x74, 0x61};
+ guint8 expected[] = {
+ 0x20, 0xc5, 0x91, 0x93, 0x8f, 0xc3, 0xaf, 0xcc, 0x3b, 0xba, 0x91, 0xd2, 0xfb,
+ 0x84, 0xbf, 0x7b, 0x44, 0x04, 0xf9, 0x4c, 0x45, 0xed, 0x4d, 0x11, 0xa7, 0xe2,
+ 0xb4, 0x12, 0x3e, 0xab, 0x0b, 0x77, 0x4a, 0x12, 0xb4, 0x22, 0xd0, 0xda, 0xb5,
+ 0x25, 0x29, 0x02, 0xfc, 0x54, 0x47, 0xea, 0x82, 0x63, 0x8c, 0x1a, 0xfb, 0xa7,
+ 0xa9, 0x94, 0x24, 0x13, 0x0e, 0x44, 0x36, 0x3b, 0x9d, 0x9f, 0xc9, 0x60};
+ guint8 real[H_LEN];
+
+ if (catena(pw, sizeof(pw), salt, sizeof(salt), ad, sizeof(ad),
+ 4, 10, 10, H_LEN, real) != 0) {
+ return -1;
+ }
+
+ return memcmp(real, expected, H_LEN);
+}
diff --git a/src/libcryptobox/catena/catena.h b/src/libcryptobox/catena/catena.h
new file mode 100644
index 0000000..1fcea21
--- /dev/null
+++ b/src/libcryptobox/catena/catena.h
@@ -0,0 +1,62 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBCRYPTOBOX_CATENA_CATENA_H_
+#define SRC_LIBCRYPTOBOX_CATENA_CATENA_H_
+
+/* Modes */
+#define PASSWORD_HASHING_MODE 0
+#define KEY_DERIVATION_MODE 1
+#define REGULAR 0
+#define CLIENT 1
+
+#define CATENA_HLEN 64
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int catena(const uint8_t *pwd, const uint32_t pwdlen,
+ const uint8_t *salt, const uint8_t saltlen,
+ const uint8_t *data, const uint32_t datalen,
+ const uint8_t lambda, const uint8_t min_garlic,
+ const uint8_t garlic, const uint8_t hashlen, uint8_t *hash);
+
+/**
+ * Simple interface for catena PBKDF
+ * @param pwd password
+ * @param pwdlen length of password
+ * @param salt salt
+ * @param saltlen length of salt
+ * @param data additional data
+ * @param datalen length of additional data
+ * @param hash output hash
+ * @return 0 if hash is generated, -1 in case of error
+ */
+int simple_catena(const uint8_t *pwd, const uint32_t pwdlen,
+ const uint8_t *salt, const uint8_t saltlen,
+ const uint8_t *data, const uint32_t datalen,
+ uint8_t hash[CATENA_HLEN]);
+
+/**
+ * Run a quick test on catena implementation
+ */
+int catena_test(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBCRYPTOBOX_CATENA_CATENA_H_ */
diff --git a/src/libcryptobox/chacha20/avx.S b/src/libcryptobox/chacha20/avx.S
new file mode 100644
index 0000000..7689b84
--- /dev/null
+++ b/src/libcryptobox/chacha20/avx.S
@@ -0,0 +1,614 @@
+#include "../macro.S"
+#include "constants.S"
+SECTION_TEXT
+
+GLOBAL_HIDDEN_FN chacha_blocks_avx
+chacha_blocks_avx_local:
+pushq %rbx
+pushq %rbp
+movq %rsp, %rbp
+andq $~63, %rsp
+subq $512, %rsp
+LOAD_VAR_PIC chacha_constants, %rax
+vmovdqa 0(%rax), %xmm8
+vmovdqa 16(%rax), %xmm6
+vmovdqa 32(%rax), %xmm7
+vmovdqu 0(%rdi), %xmm9
+vmovdqu 16(%rdi), %xmm10
+vmovdqu 32(%rdi), %xmm11
+movq 48(%rdi), %rax
+movq $1, %r9
+vmovdqa %xmm8, 0(%rsp)
+vmovdqa %xmm9, 16(%rsp)
+vmovdqa %xmm10, 32(%rsp)
+vmovdqa %xmm11, 48(%rsp)
+vmovdqa %xmm6, 80(%rsp)
+vmovdqa %xmm7, 96(%rsp)
+movq %rax, 64(%rsp)
+cmpq $256, %rcx
+jb chacha_blocks_avx_below256
+vpshufd $0x00, %xmm8, %xmm0
+vpshufd $0x55, %xmm8, %xmm1
+vpshufd $0xaa, %xmm8, %xmm2
+vpshufd $0xff, %xmm8, %xmm3
+vmovdqa %xmm0, 128(%rsp)
+vmovdqa %xmm1, 144(%rsp)
+vmovdqa %xmm2, 160(%rsp)
+vmovdqa %xmm3, 176(%rsp)
+vpshufd $0x00, %xmm9, %xmm0
+vpshufd $0x55, %xmm9, %xmm1
+vpshufd $0xaa, %xmm9, %xmm2
+vpshufd $0xff, %xmm9, %xmm3
+vmovdqa %xmm0, 192(%rsp)
+vmovdqa %xmm1, 208(%rsp)
+vmovdqa %xmm2, 224(%rsp)
+vmovdqa %xmm3, 240(%rsp)
+vpshufd $0x00, %xmm10, %xmm0
+vpshufd $0x55, %xmm10, %xmm1
+vpshufd $0xaa, %xmm10, %xmm2
+vpshufd $0xff, %xmm10, %xmm3
+vmovdqa %xmm0, 256(%rsp)
+vmovdqa %xmm1, 272(%rsp)
+vmovdqa %xmm2, 288(%rsp)
+vmovdqa %xmm3, 304(%rsp)
+vpshufd $0xaa, %xmm11, %xmm0
+vpshufd $0xff, %xmm11, %xmm1
+vmovdqa %xmm0, 352(%rsp)
+vmovdqa %xmm1, 368(%rsp)
+jmp chacha_blocks_avx_atleast256
+.p2align 6,,63
+nop
+nop
+nop
+nop
+nop
+chacha_blocks_avx_atleast256:
+movq 48(%rsp), %rax
+leaq 1(%rax), %r8
+leaq 2(%rax), %r9
+leaq 3(%rax), %r10
+leaq 4(%rax), %rbx
+movl %eax, 320(%rsp)
+movl %r8d, 4+320(%rsp)
+movl %r9d, 8+320(%rsp)
+movl %r10d, 12+320(%rsp)
+shrq $32, %rax
+shrq $32, %r8
+shrq $32, %r9
+shrq $32, %r10
+movl %eax, 336(%rsp)
+movl %r8d, 4+336(%rsp)
+movl %r9d, 8+336(%rsp)
+movl %r10d, 12+336(%rsp)
+movq %rbx, 48(%rsp)
+movq 64(%rsp), %rax
+vmovdqa 128(%rsp), %xmm0
+vmovdqa 144(%rsp), %xmm1
+vmovdqa 160(%rsp), %xmm2
+vmovdqa 176(%rsp), %xmm3
+vmovdqa 192(%rsp), %xmm4
+vmovdqa 208(%rsp), %xmm5
+vmovdqa 224(%rsp), %xmm6
+vmovdqa 240(%rsp), %xmm7
+vmovdqa 256(%rsp), %xmm8
+vmovdqa 272(%rsp), %xmm9
+vmovdqa 288(%rsp), %xmm10
+vmovdqa 304(%rsp), %xmm11
+vmovdqa 320(%rsp), %xmm12
+vmovdqa 336(%rsp), %xmm13
+vmovdqa 352(%rsp), %xmm14
+vmovdqa 368(%rsp), %xmm15
+chacha_blocks_avx_mainloop1:
+vpaddd %xmm0, %xmm4, %xmm0
+vpaddd %xmm1, %xmm5, %xmm1
+vpxor %xmm12, %xmm0, %xmm12
+vpxor %xmm13, %xmm1, %xmm13
+vpaddd %xmm2, %xmm6, %xmm2
+vpaddd %xmm3, %xmm7, %xmm3
+vpxor %xmm14, %xmm2, %xmm14
+vpxor %xmm15, %xmm3, %xmm15
+vpshufb 80(%rsp), %xmm12, %xmm12
+vpshufb 80(%rsp), %xmm13, %xmm13
+vpaddd %xmm8, %xmm12, %xmm8
+vpaddd %xmm9, %xmm13, %xmm9
+vpshufb 80(%rsp), %xmm14, %xmm14
+vpshufb 80(%rsp), %xmm15, %xmm15
+vpaddd %xmm10, %xmm14, %xmm10
+vpaddd %xmm11, %xmm15, %xmm11
+vmovdqa %xmm12, 112(%rsp)
+vpxor %xmm4, %xmm8, %xmm4
+vpxor %xmm5, %xmm9, %xmm5
+vpslld $ 12, %xmm4, %xmm12
+vpsrld $20, %xmm4, %xmm4
+vpxor %xmm4, %xmm12, %xmm4
+vpslld $ 12, %xmm5, %xmm12
+vpsrld $20, %xmm5, %xmm5
+vpxor %xmm5, %xmm12, %xmm5
+vpxor %xmm6, %xmm10, %xmm6
+vpxor %xmm7, %xmm11, %xmm7
+vpslld $ 12, %xmm6, %xmm12
+vpsrld $20, %xmm6, %xmm6
+vpxor %xmm6, %xmm12, %xmm6
+vpslld $ 12, %xmm7, %xmm12
+vpsrld $20, %xmm7, %xmm7
+vpxor %xmm7, %xmm12, %xmm7
+vpaddd %xmm0, %xmm4, %xmm0
+vpaddd %xmm1, %xmm5, %xmm1
+vpxor 112(%rsp), %xmm0, %xmm12
+vpxor %xmm13, %xmm1, %xmm13
+vpaddd %xmm2, %xmm6, %xmm2
+vpaddd %xmm3, %xmm7, %xmm3
+vpxor %xmm14, %xmm2, %xmm14
+vpxor %xmm15, %xmm3, %xmm15
+vpshufb 96(%rsp), %xmm12, %xmm12
+vpshufb 96(%rsp), %xmm13, %xmm13
+vpaddd %xmm8, %xmm12, %xmm8
+vpaddd %xmm9, %xmm13, %xmm9
+vpshufb 96(%rsp), %xmm14, %xmm14
+vpshufb 96(%rsp), %xmm15, %xmm15
+vpaddd %xmm10, %xmm14, %xmm10
+vpaddd %xmm11, %xmm15, %xmm11
+vmovdqa %xmm12, 112(%rsp)
+vpxor %xmm4, %xmm8, %xmm4
+vpxor %xmm5, %xmm9, %xmm5
+vpslld $ 7, %xmm4, %xmm12
+vpsrld $25, %xmm4, %xmm4
+vpxor %xmm4, %xmm12, %xmm4
+vpslld $ 7, %xmm5, %xmm12
+vpsrld $25, %xmm5, %xmm5
+vpxor %xmm5, %xmm12, %xmm5
+vpxor %xmm6, %xmm10, %xmm6
+vpxor %xmm7, %xmm11, %xmm7
+vpslld $ 7, %xmm6, %xmm12
+vpsrld $25, %xmm6, %xmm6
+vpxor %xmm6, %xmm12, %xmm6
+vpslld $ 7, %xmm7, %xmm12
+vpsrld $25, %xmm7, %xmm7
+vpxor %xmm7, %xmm12, %xmm7
+vpaddd %xmm0, %xmm5, %xmm0
+vpaddd %xmm1, %xmm6, %xmm1
+vpxor %xmm15, %xmm0, %xmm15
+vpxor 112(%rsp), %xmm1, %xmm12
+vpaddd %xmm2, %xmm7, %xmm2
+vpaddd %xmm3, %xmm4, %xmm3
+vpxor %xmm13, %xmm2, %xmm13
+vpxor %xmm14, %xmm3, %xmm14
+vpshufb 80(%rsp), %xmm15, %xmm15
+vpshufb 80(%rsp), %xmm12, %xmm12
+vpaddd %xmm10, %xmm15, %xmm10
+vpaddd %xmm11, %xmm12, %xmm11
+vpshufb 80(%rsp), %xmm13, %xmm13
+vpshufb 80(%rsp), %xmm14, %xmm14
+vpaddd %xmm8, %xmm13, %xmm8
+vpaddd %xmm9, %xmm14, %xmm9
+vmovdqa %xmm15, 112(%rsp)
+vpxor %xmm5, %xmm10, %xmm5
+vpxor %xmm6, %xmm11, %xmm6
+vpslld $ 12, %xmm5, %xmm15
+vpsrld $20, %xmm5, %xmm5
+vpxor %xmm5, %xmm15, %xmm5
+vpslld $ 12, %xmm6, %xmm15
+vpsrld $20, %xmm6, %xmm6
+vpxor %xmm6, %xmm15, %xmm6
+vpxor %xmm7, %xmm8, %xmm7
+vpxor %xmm4, %xmm9, %xmm4
+vpslld $ 12, %xmm7, %xmm15
+vpsrld $20, %xmm7, %xmm7
+vpxor %xmm7, %xmm15, %xmm7
+vpslld $ 12, %xmm4, %xmm15
+vpsrld $20, %xmm4, %xmm4
+vpxor %xmm4, %xmm15, %xmm4
+vpaddd %xmm0, %xmm5, %xmm0
+vpaddd %xmm1, %xmm6, %xmm1
+vpxor 112(%rsp), %xmm0, %xmm15
+vpxor %xmm12, %xmm1, %xmm12
+vpaddd %xmm2, %xmm7, %xmm2
+vpaddd %xmm3, %xmm4, %xmm3
+vpxor %xmm13, %xmm2, %xmm13
+vpxor %xmm14, %xmm3, %xmm14
+vpshufb 96(%rsp), %xmm15, %xmm15
+vpshufb 96(%rsp), %xmm12, %xmm12
+vpaddd %xmm10, %xmm15, %xmm10
+vpaddd %xmm11, %xmm12, %xmm11
+vpshufb 96(%rsp), %xmm13, %xmm13
+vpshufb 96(%rsp), %xmm14, %xmm14
+vpaddd %xmm8, %xmm13, %xmm8
+vpaddd %xmm9, %xmm14, %xmm9
+vmovdqa %xmm15, 112(%rsp)
+vpxor %xmm5, %xmm10, %xmm5
+vpxor %xmm6, %xmm11, %xmm6
+vpslld $ 7, %xmm5, %xmm15
+vpsrld $25, %xmm5, %xmm5
+vpxor %xmm5, %xmm15, %xmm5
+vpslld $ 7, %xmm6, %xmm15
+vpsrld $25, %xmm6, %xmm6
+vpxor %xmm6, %xmm15, %xmm6
+vpxor %xmm7, %xmm8, %xmm7
+vpxor %xmm4, %xmm9, %xmm4
+vpslld $ 7, %xmm7, %xmm15
+vpsrld $25, %xmm7, %xmm7
+vpxor %xmm7, %xmm15, %xmm7
+vpslld $ 7, %xmm4, %xmm15
+vpsrld $25, %xmm4, %xmm4
+vpxor %xmm4, %xmm15, %xmm4
+vmovdqa 112(%rsp), %xmm15
+subq $2, %rax
+jnz chacha_blocks_avx_mainloop1
+vpaddd 128(%rsp), %xmm0, %xmm0
+vpaddd 144(%rsp), %xmm1, %xmm1
+vpaddd 160(%rsp), %xmm2, %xmm2
+vpaddd 176(%rsp), %xmm3, %xmm3
+vpaddd 192(%rsp), %xmm4, %xmm4
+vpaddd 208(%rsp), %xmm5, %xmm5
+vpaddd 224(%rsp), %xmm6, %xmm6
+vpaddd 240(%rsp), %xmm7, %xmm7
+vpaddd 256(%rsp), %xmm8, %xmm8
+vpaddd 272(%rsp), %xmm9, %xmm9
+vpaddd 288(%rsp), %xmm10, %xmm10
+vpaddd 304(%rsp), %xmm11, %xmm11
+vpaddd 320(%rsp), %xmm12, %xmm12
+vpaddd 336(%rsp), %xmm13, %xmm13
+vpaddd 352(%rsp), %xmm14, %xmm14
+vpaddd 368(%rsp), %xmm15, %xmm15
+vmovdqa %xmm8, 384(%rsp)
+vmovdqa %xmm9, 400(%rsp)
+vmovdqa %xmm10, 416(%rsp)
+vmovdqa %xmm11, 432(%rsp)
+vmovdqa %xmm12, 448(%rsp)
+vmovdqa %xmm13, 464(%rsp)
+vmovdqa %xmm14, 480(%rsp)
+vmovdqa %xmm15, 496(%rsp)
+vpunpckldq %xmm1, %xmm0, %xmm8
+vpunpckldq %xmm3, %xmm2, %xmm9
+vpunpckhdq %xmm1, %xmm0, %xmm12
+vpunpckhdq %xmm3, %xmm2, %xmm13
+vpunpckldq %xmm5, %xmm4, %xmm10
+vpunpckldq %xmm7, %xmm6, %xmm11
+vpunpckhdq %xmm5, %xmm4, %xmm14
+vpunpckhdq %xmm7, %xmm6, %xmm15
+vpunpcklqdq %xmm9, %xmm8, %xmm0
+vpunpcklqdq %xmm11, %xmm10, %xmm1
+vpunpckhqdq %xmm9, %xmm8, %xmm2
+vpunpckhqdq %xmm11, %xmm10, %xmm3
+vpunpcklqdq %xmm13, %xmm12, %xmm4
+vpunpcklqdq %xmm15, %xmm14, %xmm5
+vpunpckhqdq %xmm13, %xmm12, %xmm6
+vpunpckhqdq %xmm15, %xmm14, %xmm7
+andq %rsi, %rsi
+jz chacha_blocks_avx_noinput1
+vpxor 0(%rsi), %xmm0, %xmm0
+vpxor 16(%rsi), %xmm1, %xmm1
+vpxor 64(%rsi), %xmm2, %xmm2
+vpxor 80(%rsi), %xmm3, %xmm3
+vpxor 128(%rsi), %xmm4, %xmm4
+vpxor 144(%rsi), %xmm5, %xmm5
+vpxor 192(%rsi), %xmm6, %xmm6
+vpxor 208(%rsi), %xmm7, %xmm7
+vmovdqu %xmm0, 0(%rdx)
+vmovdqu %xmm1, 16(%rdx)
+vmovdqu %xmm2, 64(%rdx)
+vmovdqu %xmm3, 80(%rdx)
+vmovdqu %xmm4, 128(%rdx)
+vmovdqu %xmm5, 144(%rdx)
+vmovdqu %xmm6, 192(%rdx)
+vmovdqu %xmm7, 208(%rdx)
+vmovdqa 384(%rsp), %xmm0
+vmovdqa 400(%rsp), %xmm1
+vmovdqa 416(%rsp), %xmm2
+vmovdqa 432(%rsp), %xmm3
+vmovdqa 448(%rsp), %xmm4
+vmovdqa 464(%rsp), %xmm5
+vmovdqa 480(%rsp), %xmm6
+vmovdqa 496(%rsp), %xmm7
+vpunpckldq %xmm1, %xmm0, %xmm8
+vpunpckldq %xmm3, %xmm2, %xmm9
+vpunpckhdq %xmm1, %xmm0, %xmm12
+vpunpckhdq %xmm3, %xmm2, %xmm13
+vpunpckldq %xmm5, %xmm4, %xmm10
+vpunpckldq %xmm7, %xmm6, %xmm11
+vpunpckhdq %xmm5, %xmm4, %xmm14
+vpunpckhdq %xmm7, %xmm6, %xmm15
+vpunpcklqdq %xmm9, %xmm8, %xmm0
+vpunpcklqdq %xmm11, %xmm10, %xmm1
+vpunpckhqdq %xmm9, %xmm8, %xmm2
+vpunpckhqdq %xmm11, %xmm10, %xmm3
+vpunpcklqdq %xmm13, %xmm12, %xmm4
+vpunpcklqdq %xmm15, %xmm14, %xmm5
+vpunpckhqdq %xmm13, %xmm12, %xmm6
+vpunpckhqdq %xmm15, %xmm14, %xmm7
+vpxor 32(%rsi), %xmm0, %xmm0
+vpxor 48(%rsi), %xmm1, %xmm1
+vpxor 96(%rsi), %xmm2, %xmm2
+vpxor 112(%rsi), %xmm3, %xmm3
+vpxor 160(%rsi), %xmm4, %xmm4
+vpxor 176(%rsi), %xmm5, %xmm5
+vpxor 224(%rsi), %xmm6, %xmm6
+vpxor 240(%rsi), %xmm7, %xmm7
+vmovdqu %xmm0, 32(%rdx)
+vmovdqu %xmm1, 48(%rdx)
+vmovdqu %xmm2, 96(%rdx)
+vmovdqu %xmm3, 112(%rdx)
+vmovdqu %xmm4, 160(%rdx)
+vmovdqu %xmm5, 176(%rdx)
+vmovdqu %xmm6, 224(%rdx)
+vmovdqu %xmm7, 240(%rdx)
+addq $256, %rsi
+jmp chacha_blocks_avx_mainloop_cont
+chacha_blocks_avx_noinput1:
+vmovdqu %xmm0, 0(%rdx)
+vmovdqu %xmm1, 16(%rdx)
+vmovdqu %xmm2, 64(%rdx)
+vmovdqu %xmm3, 80(%rdx)
+vmovdqu %xmm4, 128(%rdx)
+vmovdqu %xmm5, 144(%rdx)
+vmovdqu %xmm6, 192(%rdx)
+vmovdqu %xmm7, 208(%rdx)
+vmovdqa 384(%rsp), %xmm0
+vmovdqa 400(%rsp), %xmm1
+vmovdqa 416(%rsp), %xmm2
+vmovdqa 432(%rsp), %xmm3
+vmovdqa 448(%rsp), %xmm4
+vmovdqa 464(%rsp), %xmm5
+vmovdqa 480(%rsp), %xmm6
+vmovdqa 496(%rsp), %xmm7
+vpunpckldq %xmm1, %xmm0, %xmm8
+vpunpckldq %xmm3, %xmm2, %xmm9
+vpunpckhdq %xmm1, %xmm0, %xmm12
+vpunpckhdq %xmm3, %xmm2, %xmm13
+vpunpckldq %xmm5, %xmm4, %xmm10
+vpunpckldq %xmm7, %xmm6, %xmm11
+vpunpckhdq %xmm5, %xmm4, %xmm14
+vpunpckhdq %xmm7, %xmm6, %xmm15
+vpunpcklqdq %xmm9, %xmm8, %xmm0
+vpunpcklqdq %xmm11, %xmm10, %xmm1
+vpunpckhqdq %xmm9, %xmm8, %xmm2
+vpunpckhqdq %xmm11, %xmm10, %xmm3
+vpunpcklqdq %xmm13, %xmm12, %xmm4
+vpunpcklqdq %xmm15, %xmm14, %xmm5
+vpunpckhqdq %xmm13, %xmm12, %xmm6
+vpunpckhqdq %xmm15, %xmm14, %xmm7
+vmovdqu %xmm0, 32(%rdx)
+vmovdqu %xmm1, 48(%rdx)
+vmovdqu %xmm2, 96(%rdx)
+vmovdqu %xmm3, 112(%rdx)
+vmovdqu %xmm4, 160(%rdx)
+vmovdqu %xmm5, 176(%rdx)
+vmovdqu %xmm6, 224(%rdx)
+vmovdqu %xmm7, 240(%rdx)
+chacha_blocks_avx_mainloop_cont:
+addq $256, %rdx
+subq $256, %rcx
+cmp $256, %rcx
+jae chacha_blocks_avx_atleast256
+vmovdqa 80(%rsp), %xmm6
+vmovdqa 96(%rsp), %xmm7
+vmovdqa 0(%rsp), %xmm8
+vmovdqa 16(%rsp), %xmm9
+vmovdqa 32(%rsp), %xmm10
+vmovdqa 48(%rsp), %xmm11
+movq $1, %r9
+chacha_blocks_avx_below256:
+vmovq %r9, %xmm5
+andq %rcx, %rcx
+jz chacha_blocks_avx_done
+cmpq $64, %rcx
+jae chacha_blocks_avx_above63
+movq %rdx, %r9
+andq %rsi, %rsi
+jz chacha_blocks_avx_noinput2
+movq %rcx, %r10
+movq %rsp, %rdx
+addq %r10, %rsi
+addq %r10, %rdx
+negq %r10
+chacha_blocks_avx_copyinput:
+movb (%rsi, %r10), %al
+movb %al, (%rdx, %r10)
+incq %r10
+jnz chacha_blocks_avx_copyinput
+movq %rsp, %rsi
+chacha_blocks_avx_noinput2:
+movq %rsp, %rdx
+chacha_blocks_avx_above63:
+vmovdqa %xmm8, %xmm0
+vmovdqa %xmm9, %xmm1
+vmovdqa %xmm10, %xmm2
+vmovdqa %xmm11, %xmm3
+movq 64(%rsp), %rax
+chacha_blocks_avx_mainloop2:
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm6, %xmm3, %xmm3
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpslld $12, %xmm1, %xmm4
+vpsrld $20, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm7, %xmm3, %xmm3
+vpshufd $0x93, %xmm0, %xmm0
+vpaddd %xmm2, %xmm3, %xmm2
+vpshufd $0x4e, %xmm3, %xmm3
+vpxor %xmm1, %xmm2, %xmm1
+vpshufd $0x39, %xmm2, %xmm2
+vpslld $7, %xmm1, %xmm4
+vpsrld $25, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm6, %xmm3, %xmm3
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpslld $12, %xmm1, %xmm4
+vpsrld $20, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm7, %xmm3, %xmm3
+vpshufd $0x39, %xmm0, %xmm0
+vpaddd %xmm2, %xmm3, %xmm2
+vpshufd $0x4e, %xmm3, %xmm3
+vpxor %xmm1, %xmm2, %xmm1
+vpshufd $0x93, %xmm2, %xmm2
+vpslld $7, %xmm1, %xmm4
+vpsrld $25, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+subq $2, %rax
+jnz chacha_blocks_avx_mainloop2
+vpaddd %xmm0, %xmm8, %xmm0
+vpaddd %xmm1, %xmm9, %xmm1
+vpaddd %xmm2, %xmm10, %xmm2
+vpaddd %xmm3, %xmm11, %xmm3
+andq %rsi, %rsi
+jz chacha_blocks_avx_noinput3
+vpxor 0(%rsi), %xmm0, %xmm0
+vpxor 16(%rsi), %xmm1, %xmm1
+vpxor 32(%rsi), %xmm2, %xmm2
+vpxor 48(%rsi), %xmm3, %xmm3
+addq $64, %rsi
+chacha_blocks_avx_noinput3:
+vmovdqu %xmm0, 0(%rdx)
+vmovdqu %xmm1, 16(%rdx)
+vmovdqu %xmm2, 32(%rdx)
+vmovdqu %xmm3, 48(%rdx)
+vpaddq %xmm11, %xmm5, %xmm11
+cmpq $64, %rcx
+jbe chacha_blocks_avx_mainloop2_finishup
+addq $64, %rdx
+subq $64, %rcx
+jmp chacha_blocks_avx_below256
+chacha_blocks_avx_mainloop2_finishup:
+cmpq $64, %rcx
+je chacha_blocks_avx_done
+addq %rcx, %r9
+addq %rcx, %rdx
+negq %rcx
+chacha_blocks_avx_copyoutput:
+movb (%rdx, %rcx), %al
+movb %al, (%r9, %rcx)
+incq %rcx
+jnz chacha_blocks_avx_copyoutput
+chacha_blocks_avx_done:
+vmovdqu %xmm11, 32(%rdi)
+movq %rbp, %rsp
+popq %rbp
+popq %rbx
+ret
+FN_END chacha_blocks_avx
+
+GLOBAL_HIDDEN_FN hchacha_avx
+hchacha_avx_local:
+LOAD_VAR_PIC chacha_constants, %rax
+vmovdqa 0(%rax), %xmm0
+vmovdqa 16(%rax), %xmm6
+vmovdqa 32(%rax), %xmm5
+vmovdqu 0(%rdi), %xmm1
+vmovdqu 16(%rdi), %xmm2
+vmovdqu 0(%rsi), %xmm3
+hhacha_mainloop_avx:
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm6, %xmm3, %xmm3
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpslld $12, %xmm1, %xmm4
+vpsrld $20, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm5, %xmm3, %xmm3
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpslld $7, %xmm1, %xmm4
+vpsrld $25, %xmm1, %xmm1
+vpshufd $0x93, %xmm0, %xmm0
+vpxor %xmm1, %xmm4, %xmm1
+vpshufd $0x4e, %xmm3, %xmm3
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm6, %xmm3, %xmm3
+vpshufd $0x39, %xmm2, %xmm2
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpslld $12, %xmm1, %xmm4
+vpsrld $20, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm5, %xmm3, %xmm3
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpshufd $0x39, %xmm0, %xmm0
+vpslld $7, %xmm1, %xmm4
+vpshufd $0x4e, %xmm3, %xmm3
+vpsrld $25, %xmm1, %xmm1
+vpshufd $0x93, %xmm2, %xmm2
+vpxor %xmm1, %xmm4, %xmm1
+subl $2, %ecx
+jne hhacha_mainloop_avx
+vmovdqu %xmm0, (%rdx)
+vmovdqu %xmm3, 16(%rdx)
+ret
+FN_END hchacha_avx
+
+GLOBAL_HIDDEN_FN_EXT chacha_avx, 6, 16
+pushq %rbp
+movq %rsp, %rbp
+subq $64, %rsp
+andq $~63, %rsp
+vmovdqu 0(%rdi), %xmm0
+vmovdqu 16(%rdi), %xmm1
+vmovdqa %xmm0, 0(%rsp)
+vmovdqa %xmm1, 16(%rsp)
+xorq %rdi, %rdi
+movq %rdi, 32(%rsp)
+movq 0(%rsi), %rsi
+movq %rsi, 40(%rsp)
+movq %r9, 48(%rsp)
+movq %rsp, %rdi
+movq %rdx, %rsi
+movq %rcx, %rdx
+movq %r8, %rcx
+call chacha_blocks_avx_local
+vpxor %xmm0, %xmm0, %xmm0
+vmovdqa %xmm0, 0(%rsp)
+vmovdqa %xmm0, 16(%rsp)
+vmovdqa %xmm0, 32(%rsp)
+movq %rbp, %rsp
+popq %rbp
+ret
+FN_END chacha_avx
+
+GLOBAL_HIDDEN_FN_EXT xchacha_avx, 6, 16
+pushq %rbp
+pushq %rbx
+movq %rsp, %rbp
+subq $64, %rsp
+andq $~63, %rsp
+movq %rsp, %rbx
+xorq %rax, %rax
+movq %rax, 32(%rbx)
+movq 16(%rsi), %rax
+movq %rax, 40(%rbx)
+movq %r9, 48(%rbx)
+pushq %rdx
+pushq %rcx
+pushq %r8
+movq %rbx, %rdx
+movq %r9, %rcx
+call hchacha_avx_local
+movq %rbx, %rdi
+popq %rcx
+popq %rdx
+popq %rsi
+call chacha_blocks_avx_local
+vpxor %xmm0, %xmm0, %xmm0
+vmovdqa %xmm0, 0(%rbx)
+vmovdqa %xmm0, 16(%rbx)
+vmovdqa %xmm0, 32(%rbx)
+movq %rbp, %rsp
+popq %rbx
+popq %rbp
+ret
+FN_END xchacha_avx
diff --git a/src/libcryptobox/chacha20/avx2.S b/src/libcryptobox/chacha20/avx2.S
new file mode 100644
index 0000000..efd0f54
--- /dev/null
+++ b/src/libcryptobox/chacha20/avx2.S
@@ -0,0 +1,1018 @@
+#include "../macro.S"
+#include "constants.S"
+SECTION_TEXT
+
+GLOBAL_HIDDEN_FN chacha_blocks_avx2
+chacha_blocks_avx2_local:
+pushq %rbx
+pushq %rbp
+pushq %r12
+pushq %r13
+pushq %r14
+movq %rsp, %rbp
+andq $~63, %rsp
+subq $512, %rsp
+LOAD_VAR_PIC chacha_constants, %rax
+vmovdqa 0(%rax), %xmm8
+vmovdqa 16(%rax), %xmm6
+vmovdqa 32(%rax), %xmm7
+vmovdqu 0(%rdi), %xmm9
+vmovdqu 16(%rdi), %xmm10
+vmovdqu 32(%rdi), %xmm11
+movq 48(%rdi), %rax
+movq $1, %r9
+vmovdqa %xmm8, 0(%rsp)
+vmovdqa %xmm9, 16(%rsp)
+vmovdqa %xmm10, 32(%rsp)
+vmovdqa %xmm11, 48(%rsp)
+movq %rax, 64(%rsp)
+vmovdqa %xmm6, 448(%rsp)
+vmovdqa %xmm6, 464(%rsp)
+vmovdqa %xmm7, 480(%rsp)
+vmovdqa %xmm7, 496(%rsp)
+cmpq $512, %rcx
+jae chacha_blocks_avx2_atleast512
+cmp $256, %rcx
+jae chacha_blocks_avx2_atleast256
+jmp chacha_blocks_avx2_below256
+.p2align 6,,63
+chacha_blocks_avx2_atleast512:
+movq 48(%rsp), %rax
+leaq 1(%rax), %r8
+leaq 2(%rax), %r9
+leaq 3(%rax), %r10
+leaq 4(%rax), %rbx
+leaq 5(%rax), %r11
+leaq 6(%rax), %r12
+leaq 7(%rax), %r13
+leaq 8(%rax), %r14
+movl %eax, 128(%rsp)
+movl %r8d, 4+128(%rsp)
+movl %r9d, 8+128(%rsp)
+movl %r10d, 12+128(%rsp)
+movl %ebx, 16+128(%rsp)
+movl %r11d, 20+128(%rsp)
+movl %r12d, 24+128(%rsp)
+movl %r13d, 28+128(%rsp)
+shrq $32, %rax
+shrq $32, %r8
+shrq $32, %r9
+shrq $32, %r10
+shrq $32, %rbx
+shrq $32, %r11
+shrq $32, %r12
+shrq $32, %r13
+movl %eax, 160(%rsp)
+movl %r8d, 4+160(%rsp)
+movl %r9d, 8+160(%rsp)
+movl %r10d, 12+160(%rsp)
+movl %ebx, 16+160(%rsp)
+movl %r11d, 20+160(%rsp)
+movl %r12d, 24+160(%rsp)
+movl %r13d, 28+160(%rsp)
+movq %r14, 48(%rsp)
+movq 64(%rsp), %rax
+vpbroadcastd 0(%rsp), %ymm0
+vpbroadcastd 4+0(%rsp), %ymm1
+vpbroadcastd 8+0(%rsp), %ymm2
+vpbroadcastd 12+0(%rsp), %ymm3
+vpbroadcastd 16(%rsp), %ymm4
+vpbroadcastd 4+16(%rsp), %ymm5
+vpbroadcastd 8+16(%rsp), %ymm6
+vpbroadcastd 12+16(%rsp), %ymm7
+vpbroadcastd 32(%rsp), %ymm8
+vpbroadcastd 4+32(%rsp), %ymm9
+vpbroadcastd 8+32(%rsp), %ymm10
+vpbroadcastd 12+32(%rsp), %ymm11
+vpbroadcastd 8+48(%rsp), %ymm14
+vpbroadcastd 12+48(%rsp), %ymm15
+vmovdqa 128(%rsp), %ymm12
+vmovdqa 160(%rsp), %ymm13
+chacha_blocks_avx2_mainloop1:
+vpaddd %ymm0, %ymm4, %ymm0
+vpaddd %ymm1, %ymm5, %ymm1
+vpxor %ymm12, %ymm0, %ymm12
+vpxor %ymm13, %ymm1, %ymm13
+vpaddd %ymm2, %ymm6, %ymm2
+vpaddd %ymm3, %ymm7, %ymm3
+vpxor %ymm14, %ymm2, %ymm14
+vpxor %ymm15, %ymm3, %ymm15
+vpshufb 448(%rsp), %ymm12, %ymm12
+vpshufb 448(%rsp), %ymm13, %ymm13
+vpaddd %ymm8, %ymm12, %ymm8
+vpaddd %ymm9, %ymm13, %ymm9
+vpshufb 448(%rsp), %ymm14, %ymm14
+vpshufb 448(%rsp), %ymm15, %ymm15
+vpaddd %ymm10, %ymm14, %ymm10
+vpaddd %ymm11, %ymm15, %ymm11
+vmovdqa %ymm12, 96(%rsp)
+vpxor %ymm4, %ymm8, %ymm4
+vpxor %ymm5, %ymm9, %ymm5
+vpslld $ 12, %ymm4, %ymm12
+vpsrld $20, %ymm4, %ymm4
+vpxor %ymm4, %ymm12, %ymm4
+vpslld $ 12, %ymm5, %ymm12
+vpsrld $20, %ymm5, %ymm5
+vpxor %ymm5, %ymm12, %ymm5
+vpxor %ymm6, %ymm10, %ymm6
+vpxor %ymm7, %ymm11, %ymm7
+vpslld $ 12, %ymm6, %ymm12
+vpsrld $20, %ymm6, %ymm6
+vpxor %ymm6, %ymm12, %ymm6
+vpslld $ 12, %ymm7, %ymm12
+vpsrld $20, %ymm7, %ymm7
+vpxor %ymm7, %ymm12, %ymm7
+vpaddd %ymm0, %ymm4, %ymm0
+vpaddd %ymm1, %ymm5, %ymm1
+vpxor 96(%rsp), %ymm0, %ymm12
+vpxor %ymm13, %ymm1, %ymm13
+vpaddd %ymm2, %ymm6, %ymm2
+vpaddd %ymm3, %ymm7, %ymm3
+vpxor %ymm14, %ymm2, %ymm14
+vpxor %ymm15, %ymm3, %ymm15
+vpshufb 480(%rsp), %ymm12, %ymm12
+vpshufb 480(%rsp), %ymm13, %ymm13
+vpaddd %ymm8, %ymm12, %ymm8
+vpaddd %ymm9, %ymm13, %ymm9
+vpshufb 480(%rsp), %ymm14, %ymm14
+vpshufb 480(%rsp), %ymm15, %ymm15
+vpaddd %ymm10, %ymm14, %ymm10
+vpaddd %ymm11, %ymm15, %ymm11
+vmovdqa %ymm12, 96(%rsp)
+vpxor %ymm4, %ymm8, %ymm4
+vpxor %ymm5, %ymm9, %ymm5
+vpslld $ 7, %ymm4, %ymm12
+vpsrld $25, %ymm4, %ymm4
+vpxor %ymm4, %ymm12, %ymm4
+vpslld $ 7, %ymm5, %ymm12
+vpsrld $25, %ymm5, %ymm5
+vpxor %ymm5, %ymm12, %ymm5
+vpxor %ymm6, %ymm10, %ymm6
+vpxor %ymm7, %ymm11, %ymm7
+vpslld $ 7, %ymm6, %ymm12
+vpsrld $25, %ymm6, %ymm6
+vpxor %ymm6, %ymm12, %ymm6
+vpslld $ 7, %ymm7, %ymm12
+vpsrld $25, %ymm7, %ymm7
+vpxor %ymm7, %ymm12, %ymm7
+vpaddd %ymm0, %ymm5, %ymm0
+vpaddd %ymm1, %ymm6, %ymm1
+vpxor %ymm15, %ymm0, %ymm15
+vpxor 96(%rsp), %ymm1, %ymm12
+vpaddd %ymm2, %ymm7, %ymm2
+vpaddd %ymm3, %ymm4, %ymm3
+vpxor %ymm13, %ymm2, %ymm13
+vpxor %ymm14, %ymm3, %ymm14
+vpshufb 448(%rsp), %ymm15, %ymm15
+vpshufb 448(%rsp), %ymm12, %ymm12
+vpaddd %ymm10, %ymm15, %ymm10
+vpaddd %ymm11, %ymm12, %ymm11
+vpshufb 448(%rsp), %ymm13, %ymm13
+vpshufb 448(%rsp), %ymm14, %ymm14
+vpaddd %ymm8, %ymm13, %ymm8
+vpaddd %ymm9, %ymm14, %ymm9
+vmovdqa %ymm15, 96(%rsp)
+vpxor %ymm5, %ymm10, %ymm5
+vpxor %ymm6, %ymm11, %ymm6
+vpslld $ 12, %ymm5, %ymm15
+vpsrld $20, %ymm5, %ymm5
+vpxor %ymm5, %ymm15, %ymm5
+vpslld $ 12, %ymm6, %ymm15
+vpsrld $20, %ymm6, %ymm6
+vpxor %ymm6, %ymm15, %ymm6
+vpxor %ymm7, %ymm8, %ymm7
+vpxor %ymm4, %ymm9, %ymm4
+vpslld $ 12, %ymm7, %ymm15
+vpsrld $20, %ymm7, %ymm7
+vpxor %ymm7, %ymm15, %ymm7
+vpslld $ 12, %ymm4, %ymm15
+vpsrld $20, %ymm4, %ymm4
+vpxor %ymm4, %ymm15, %ymm4
+vpaddd %ymm0, %ymm5, %ymm0
+vpaddd %ymm1, %ymm6, %ymm1
+vpxor 96(%rsp), %ymm0, %ymm15
+vpxor %ymm12, %ymm1, %ymm12
+vpaddd %ymm2, %ymm7, %ymm2
+vpaddd %ymm3, %ymm4, %ymm3
+vpxor %ymm13, %ymm2, %ymm13
+vpxor %ymm14, %ymm3, %ymm14
+vpshufb 480(%rsp), %ymm15, %ymm15
+vpshufb 480(%rsp), %ymm12, %ymm12
+vpaddd %ymm10, %ymm15, %ymm10
+vpaddd %ymm11, %ymm12, %ymm11
+vpshufb 480(%rsp), %ymm13, %ymm13
+vpshufb 480(%rsp), %ymm14, %ymm14
+vpaddd %ymm8, %ymm13, %ymm8
+vpaddd %ymm9, %ymm14, %ymm9
+vmovdqa %ymm15, 96(%rsp)
+vpxor %ymm5, %ymm10, %ymm5
+vpxor %ymm6, %ymm11, %ymm6
+vpslld $ 7, %ymm5, %ymm15
+vpsrld $25, %ymm5, %ymm5
+vpxor %ymm5, %ymm15, %ymm5
+vpslld $ 7, %ymm6, %ymm15
+vpsrld $25, %ymm6, %ymm6
+vpxor %ymm6, %ymm15, %ymm6
+vpxor %ymm7, %ymm8, %ymm7
+vpxor %ymm4, %ymm9, %ymm4
+vpslld $ 7, %ymm7, %ymm15
+vpsrld $25, %ymm7, %ymm7
+vpxor %ymm7, %ymm15, %ymm7
+vpslld $ 7, %ymm4, %ymm15
+vpsrld $25, %ymm4, %ymm4
+vpxor %ymm4, %ymm15, %ymm4
+vmovdqa 96(%rsp), %ymm15
+subq $2, %rax
+jnz chacha_blocks_avx2_mainloop1
+vmovdqa %ymm8, 192(%rsp)
+vmovdqa %ymm9, 224(%rsp)
+vmovdqa %ymm10, 256(%rsp)
+vmovdqa %ymm11, 288(%rsp)
+vmovdqa %ymm12, 320(%rsp)
+vmovdqa %ymm13, 352(%rsp)
+vmovdqa %ymm14, 384(%rsp)
+vmovdqa %ymm15, 416(%rsp)
+vpbroadcastd 0(%rsp), %ymm8
+vpbroadcastd 4+0(%rsp), %ymm9
+vpbroadcastd 8+0(%rsp), %ymm10
+vpbroadcastd 12+0(%rsp), %ymm11
+vpbroadcastd 16(%rsp), %ymm12
+vpbroadcastd 4+16(%rsp), %ymm13
+vpbroadcastd 8+16(%rsp), %ymm14
+vpbroadcastd 12+16(%rsp), %ymm15
+vpaddd %ymm8, %ymm0, %ymm0
+vpaddd %ymm9, %ymm1, %ymm1
+vpaddd %ymm10, %ymm2, %ymm2
+vpaddd %ymm11, %ymm3, %ymm3
+vpaddd %ymm12, %ymm4, %ymm4
+vpaddd %ymm13, %ymm5, %ymm5
+vpaddd %ymm14, %ymm6, %ymm6
+vpaddd %ymm15, %ymm7, %ymm7
+vpunpckldq %ymm1, %ymm0, %ymm8
+vpunpckldq %ymm3, %ymm2, %ymm9
+vpunpckhdq %ymm1, %ymm0, %ymm12
+vpunpckhdq %ymm3, %ymm2, %ymm13
+vpunpckldq %ymm5, %ymm4, %ymm10
+vpunpckldq %ymm7, %ymm6, %ymm11
+vpunpckhdq %ymm5, %ymm4, %ymm14
+vpunpckhdq %ymm7, %ymm6, %ymm15
+vpunpcklqdq %ymm9, %ymm8, %ymm0
+vpunpcklqdq %ymm11, %ymm10, %ymm1
+vpunpckhqdq %ymm9, %ymm8, %ymm2
+vpunpckhqdq %ymm11, %ymm10, %ymm3
+vpunpcklqdq %ymm13, %ymm12, %ymm4
+vpunpcklqdq %ymm15, %ymm14, %ymm5
+vpunpckhqdq %ymm13, %ymm12, %ymm6
+vpunpckhqdq %ymm15, %ymm14, %ymm7
+vperm2i128 $0x20, %ymm1, %ymm0, %ymm8
+vperm2i128 $0x20, %ymm3, %ymm2, %ymm9
+vperm2i128 $0x31, %ymm1, %ymm0, %ymm12
+vperm2i128 $0x31, %ymm3, %ymm2, %ymm13
+vperm2i128 $0x20, %ymm5, %ymm4, %ymm10
+vperm2i128 $0x20, %ymm7, %ymm6, %ymm11
+vperm2i128 $0x31, %ymm5, %ymm4, %ymm14
+vperm2i128 $0x31, %ymm7, %ymm6, %ymm15
+andq %rsi, %rsi
+jz chacha_blocks_avx2_noinput1
+vpxor 0(%rsi), %ymm8, %ymm8
+vpxor 64(%rsi), %ymm9, %ymm9
+vpxor 128(%rsi), %ymm10, %ymm10
+vpxor 192(%rsi), %ymm11, %ymm11
+vpxor 256(%rsi), %ymm12, %ymm12
+vpxor 320(%rsi), %ymm13, %ymm13
+vpxor 384(%rsi), %ymm14, %ymm14
+vpxor 448(%rsi), %ymm15, %ymm15
+vmovdqu %ymm8, 0(%rdx)
+vmovdqu %ymm9, 64(%rdx)
+vmovdqu %ymm10, 128(%rdx)
+vmovdqu %ymm11, 192(%rdx)
+vmovdqu %ymm12, 256(%rdx)
+vmovdqu %ymm13, 320(%rdx)
+vmovdqu %ymm14, 384(%rdx)
+vmovdqu %ymm15, 448(%rdx)
+vmovdqa 192(%rsp), %ymm0
+vmovdqa 224(%rsp), %ymm1
+vmovdqa 256(%rsp), %ymm2
+vmovdqa 288(%rsp), %ymm3
+vmovdqa 320(%rsp), %ymm4
+vmovdqa 352(%rsp), %ymm5
+vmovdqa 384(%rsp), %ymm6
+vmovdqa 416(%rsp), %ymm7
+vpbroadcastd 32(%rsp), %ymm8
+vpbroadcastd 4+32(%rsp), %ymm9
+vpbroadcastd 8+32(%rsp), %ymm10
+vpbroadcastd 12+32(%rsp), %ymm11
+vmovdqa 128(%rsp), %ymm12
+vmovdqa 160(%rsp), %ymm13
+vpbroadcastd 8+48(%rsp), %ymm14
+vpbroadcastd 12+48(%rsp), %ymm15
+vpaddd %ymm8, %ymm0, %ymm0
+vpaddd %ymm9, %ymm1, %ymm1
+vpaddd %ymm10, %ymm2, %ymm2
+vpaddd %ymm11, %ymm3, %ymm3
+vpaddd %ymm12, %ymm4, %ymm4
+vpaddd %ymm13, %ymm5, %ymm5
+vpaddd %ymm14, %ymm6, %ymm6
+vpaddd %ymm15, %ymm7, %ymm7
+vpunpckldq %ymm1, %ymm0, %ymm8
+vpunpckldq %ymm3, %ymm2, %ymm9
+vpunpckhdq %ymm1, %ymm0, %ymm12
+vpunpckhdq %ymm3, %ymm2, %ymm13
+vpunpckldq %ymm5, %ymm4, %ymm10
+vpunpckldq %ymm7, %ymm6, %ymm11
+vpunpckhdq %ymm5, %ymm4, %ymm14
+vpunpckhdq %ymm7, %ymm6, %ymm15
+vpunpcklqdq %ymm9, %ymm8, %ymm0
+vpunpcklqdq %ymm11, %ymm10, %ymm1
+vpunpckhqdq %ymm9, %ymm8, %ymm2
+vpunpckhqdq %ymm11, %ymm10, %ymm3
+vpunpcklqdq %ymm13, %ymm12, %ymm4
+vpunpcklqdq %ymm15, %ymm14, %ymm5
+vpunpckhqdq %ymm13, %ymm12, %ymm6
+vpunpckhqdq %ymm15, %ymm14, %ymm7
+vperm2i128 $0x20, %ymm1, %ymm0, %ymm8
+vperm2i128 $0x20, %ymm3, %ymm2, %ymm9
+vperm2i128 $0x31, %ymm1, %ymm0, %ymm12
+vperm2i128 $0x31, %ymm3, %ymm2, %ymm13
+vperm2i128 $0x20, %ymm5, %ymm4, %ymm10
+vperm2i128 $0x20, %ymm7, %ymm6, %ymm11
+vperm2i128 $0x31, %ymm5, %ymm4, %ymm14
+vperm2i128 $0x31, %ymm7, %ymm6, %ymm15
+vpxor 32(%rsi), %ymm8, %ymm8
+vpxor 96(%rsi), %ymm9, %ymm9
+vpxor 160(%rsi), %ymm10, %ymm10
+vpxor 224(%rsi), %ymm11, %ymm11
+vpxor 288(%rsi), %ymm12, %ymm12
+vpxor 352(%rsi), %ymm13, %ymm13
+vpxor 416(%rsi), %ymm14, %ymm14
+vpxor 480(%rsi), %ymm15, %ymm15
+vmovdqu %ymm8, 32(%rdx)
+vmovdqu %ymm9, 96(%rdx)
+vmovdqu %ymm10, 160(%rdx)
+vmovdqu %ymm11, 224(%rdx)
+vmovdqu %ymm12, 288(%rdx)
+vmovdqu %ymm13, 352(%rdx)
+vmovdqu %ymm14, 416(%rdx)
+vmovdqu %ymm15, 480(%rdx)
+addq $512, %rsi
+jmp chacha_blocks_avx2_mainloop1_cont
+chacha_blocks_avx2_noinput1:
+vmovdqu %ymm8, 0(%rdx)
+vmovdqu %ymm9, 64(%rdx)
+vmovdqu %ymm10, 128(%rdx)
+vmovdqu %ymm11, 192(%rdx)
+vmovdqu %ymm12, 256(%rdx)
+vmovdqu %ymm13, 320(%rdx)
+vmovdqu %ymm14, 384(%rdx)
+vmovdqu %ymm15, 448(%rdx)
+vmovdqa 192(%rsp), %ymm0
+vmovdqa 224(%rsp), %ymm1
+vmovdqa 256(%rsp), %ymm2
+vmovdqa 288(%rsp), %ymm3
+vmovdqa 320(%rsp), %ymm4
+vmovdqa 352(%rsp), %ymm5
+vmovdqa 384(%rsp), %ymm6
+vmovdqa 416(%rsp), %ymm7
+vpbroadcastd 32(%rsp), %ymm8
+vpbroadcastd 4+32(%rsp), %ymm9
+vpbroadcastd 8+32(%rsp), %ymm10
+vpbroadcastd 12+32(%rsp), %ymm11
+vmovdqa 128(%rsp), %ymm12
+vmovdqa 160(%rsp), %ymm13
+vpbroadcastd 8+48(%rsp), %ymm14
+vpbroadcastd 12+48(%rsp), %ymm15
+vpaddd %ymm8, %ymm0, %ymm0
+vpaddd %ymm9, %ymm1, %ymm1
+vpaddd %ymm10, %ymm2, %ymm2
+vpaddd %ymm11, %ymm3, %ymm3
+vpaddd %ymm12, %ymm4, %ymm4
+vpaddd %ymm13, %ymm5, %ymm5
+vpaddd %ymm14, %ymm6, %ymm6
+vpaddd %ymm15, %ymm7, %ymm7
+vpunpckldq %ymm1, %ymm0, %ymm8
+vpunpckldq %ymm3, %ymm2, %ymm9
+vpunpckhdq %ymm1, %ymm0, %ymm12
+vpunpckhdq %ymm3, %ymm2, %ymm13
+vpunpckldq %ymm5, %ymm4, %ymm10
+vpunpckldq %ymm7, %ymm6, %ymm11
+vpunpckhdq %ymm5, %ymm4, %ymm14
+vpunpckhdq %ymm7, %ymm6, %ymm15
+vpunpcklqdq %ymm9, %ymm8, %ymm0
+vpunpcklqdq %ymm11, %ymm10, %ymm1
+vpunpckhqdq %ymm9, %ymm8, %ymm2
+vpunpckhqdq %ymm11, %ymm10, %ymm3
+vpunpcklqdq %ymm13, %ymm12, %ymm4
+vpunpcklqdq %ymm15, %ymm14, %ymm5
+vpunpckhqdq %ymm13, %ymm12, %ymm6
+vpunpckhqdq %ymm15, %ymm14, %ymm7
+vperm2i128 $0x20, %ymm1, %ymm0, %ymm8
+vperm2i128 $0x20, %ymm3, %ymm2, %ymm9
+vperm2i128 $0x31, %ymm1, %ymm0, %ymm12
+vperm2i128 $0x31, %ymm3, %ymm2, %ymm13
+vperm2i128 $0x20, %ymm5, %ymm4, %ymm10
+vperm2i128 $0x20, %ymm7, %ymm6, %ymm11
+vperm2i128 $0x31, %ymm5, %ymm4, %ymm14
+vperm2i128 $0x31, %ymm7, %ymm6, %ymm15
+vmovdqu %ymm8, 32(%rdx)
+vmovdqu %ymm9, 96(%rdx)
+vmovdqu %ymm10, 160(%rdx)
+vmovdqu %ymm11, 224(%rdx)
+vmovdqu %ymm12, 288(%rdx)
+vmovdqu %ymm13, 352(%rdx)
+vmovdqu %ymm14, 416(%rdx)
+vmovdqu %ymm15, 480(%rdx)
+chacha_blocks_avx2_mainloop1_cont:
+addq $512, %rdx
+subq $512, %rcx
+cmp $512, %rcx
+jae chacha_blocks_avx2_atleast512
+cmp $256, %rcx
+jb chacha_blocks_avx2_below256_fixup
+chacha_blocks_avx2_atleast256:
+movq 48(%rsp), %rax
+leaq 1(%rax), %r8
+leaq 2(%rax), %r9
+leaq 3(%rax), %r10
+leaq 4(%rax), %rbx
+movl %eax, 128(%rsp)
+movl %r8d, 4+128(%rsp)
+movl %r9d, 8+128(%rsp)
+movl %r10d, 12+128(%rsp)
+shrq $32, %rax
+shrq $32, %r8
+shrq $32, %r9
+shrq $32, %r10
+movl %eax, 160(%rsp)
+movl %r8d, 4+160(%rsp)
+movl %r9d, 8+160(%rsp)
+movl %r10d, 12+160(%rsp)
+movq %rbx, 48(%rsp)
+movq 64(%rsp), %rax
+vpbroadcastd 0(%rsp), %xmm0
+vpbroadcastd 4+0(%rsp), %xmm1
+vpbroadcastd 8+0(%rsp), %xmm2
+vpbroadcastd 12+0(%rsp), %xmm3
+vpbroadcastd 16(%rsp), %xmm4
+vpbroadcastd 4+16(%rsp), %xmm5
+vpbroadcastd 8+16(%rsp), %xmm6
+vpbroadcastd 12+16(%rsp), %xmm7
+vpbroadcastd 32(%rsp), %xmm8
+vpbroadcastd 4+32(%rsp), %xmm9
+vpbroadcastd 8+32(%rsp), %xmm10
+vpbroadcastd 12+32(%rsp), %xmm11
+vmovdqa 128(%rsp), %xmm12
+vmovdqa 160(%rsp), %xmm13
+vpbroadcastd 8+48(%rsp), %xmm14
+vpbroadcastd 12+48(%rsp), %xmm15
+chacha_blocks_avx2_mainloop2:
+vpaddd %xmm0, %xmm4, %xmm0
+vpaddd %xmm1, %xmm5, %xmm1
+vpxor %xmm12, %xmm0, %xmm12
+vpxor %xmm13, %xmm1, %xmm13
+vpaddd %xmm2, %xmm6, %xmm2
+vpaddd %xmm3, %xmm7, %xmm3
+vpxor %xmm14, %xmm2, %xmm14
+vpxor %xmm15, %xmm3, %xmm15
+vpshufb 448(%rsp), %xmm12, %xmm12
+vpshufb 448(%rsp), %xmm13, %xmm13
+vpaddd %xmm8, %xmm12, %xmm8
+vpaddd %xmm9, %xmm13, %xmm9
+vpshufb 448(%rsp), %xmm14, %xmm14
+vpshufb 448(%rsp), %xmm15, %xmm15
+vpaddd %xmm10, %xmm14, %xmm10
+vpaddd %xmm11, %xmm15, %xmm11
+vmovdqa %xmm12, 96(%rsp)
+vpxor %xmm4, %xmm8, %xmm4
+vpxor %xmm5, %xmm9, %xmm5
+vpslld $ 12, %xmm4, %xmm12
+vpsrld $20, %xmm4, %xmm4
+vpxor %xmm4, %xmm12, %xmm4
+vpslld $ 12, %xmm5, %xmm12
+vpsrld $20, %xmm5, %xmm5
+vpxor %xmm5, %xmm12, %xmm5
+vpxor %xmm6, %xmm10, %xmm6
+vpxor %xmm7, %xmm11, %xmm7
+vpslld $ 12, %xmm6, %xmm12
+vpsrld $20, %xmm6, %xmm6
+vpxor %xmm6, %xmm12, %xmm6
+vpslld $ 12, %xmm7, %xmm12
+vpsrld $20, %xmm7, %xmm7
+vpxor %xmm7, %xmm12, %xmm7
+vpaddd %xmm0, %xmm4, %xmm0
+vpaddd %xmm1, %xmm5, %xmm1
+vpxor 96(%rsp), %xmm0, %xmm12
+vpxor %xmm13, %xmm1, %xmm13
+vpaddd %xmm2, %xmm6, %xmm2
+vpaddd %xmm3, %xmm7, %xmm3
+vpxor %xmm14, %xmm2, %xmm14
+vpxor %xmm15, %xmm3, %xmm15
+vpshufb 480(%rsp), %xmm12, %xmm12
+vpshufb 480(%rsp), %xmm13, %xmm13
+vpaddd %xmm8, %xmm12, %xmm8
+vpaddd %xmm9, %xmm13, %xmm9
+vpshufb 480(%rsp), %xmm14, %xmm14
+vpshufb 480(%rsp), %xmm15, %xmm15
+vpaddd %xmm10, %xmm14, %xmm10
+vpaddd %xmm11, %xmm15, %xmm11
+vmovdqa %xmm12, 96(%rsp)
+vpxor %xmm4, %xmm8, %xmm4
+vpxor %xmm5, %xmm9, %xmm5
+vpslld $ 7, %xmm4, %xmm12
+vpsrld $25, %xmm4, %xmm4
+vpxor %xmm4, %xmm12, %xmm4
+vpslld $ 7, %xmm5, %xmm12
+vpsrld $25, %xmm5, %xmm5
+vpxor %xmm5, %xmm12, %xmm5
+vpxor %xmm6, %xmm10, %xmm6
+vpxor %xmm7, %xmm11, %xmm7
+vpslld $ 7, %xmm6, %xmm12
+vpsrld $25, %xmm6, %xmm6
+vpxor %xmm6, %xmm12, %xmm6
+vpslld $ 7, %xmm7, %xmm12
+vpsrld $25, %xmm7, %xmm7
+vpxor %xmm7, %xmm12, %xmm7
+vpaddd %xmm0, %xmm5, %xmm0
+vpaddd %xmm1, %xmm6, %xmm1
+vpxor %xmm15, %xmm0, %xmm15
+vpxor 96(%rsp), %xmm1, %xmm12
+vpaddd %xmm2, %xmm7, %xmm2
+vpaddd %xmm3, %xmm4, %xmm3
+vpxor %xmm13, %xmm2, %xmm13
+vpxor %xmm14, %xmm3, %xmm14
+vpshufb 448(%rsp), %xmm15, %xmm15
+vpshufb 448(%rsp), %xmm12, %xmm12
+vpaddd %xmm10, %xmm15, %xmm10
+vpaddd %xmm11, %xmm12, %xmm11
+vpshufb 448(%rsp), %xmm13, %xmm13
+vpshufb 448(%rsp), %xmm14, %xmm14
+vpaddd %xmm8, %xmm13, %xmm8
+vpaddd %xmm9, %xmm14, %xmm9
+vmovdqa %xmm15, 96(%rsp)
+vpxor %xmm5, %xmm10, %xmm5
+vpxor %xmm6, %xmm11, %xmm6
+vpslld $ 12, %xmm5, %xmm15
+vpsrld $20, %xmm5, %xmm5
+vpxor %xmm5, %xmm15, %xmm5
+vpslld $ 12, %xmm6, %xmm15
+vpsrld $20, %xmm6, %xmm6
+vpxor %xmm6, %xmm15, %xmm6
+vpxor %xmm7, %xmm8, %xmm7
+vpxor %xmm4, %xmm9, %xmm4
+vpslld $ 12, %xmm7, %xmm15
+vpsrld $20, %xmm7, %xmm7
+vpxor %xmm7, %xmm15, %xmm7
+vpslld $ 12, %xmm4, %xmm15
+vpsrld $20, %xmm4, %xmm4
+vpxor %xmm4, %xmm15, %xmm4
+vpaddd %xmm0, %xmm5, %xmm0
+vpaddd %xmm1, %xmm6, %xmm1
+vpxor 96(%rsp), %xmm0, %xmm15
+vpxor %xmm12, %xmm1, %xmm12
+vpaddd %xmm2, %xmm7, %xmm2
+vpaddd %xmm3, %xmm4, %xmm3
+vpxor %xmm13, %xmm2, %xmm13
+vpxor %xmm14, %xmm3, %xmm14
+vpshufb 480(%rsp), %xmm15, %xmm15
+vpshufb 480(%rsp), %xmm12, %xmm12
+vpaddd %xmm10, %xmm15, %xmm10
+vpaddd %xmm11, %xmm12, %xmm11
+vpshufb 480(%rsp), %xmm13, %xmm13
+vpshufb 480(%rsp), %xmm14, %xmm14
+vpaddd %xmm8, %xmm13, %xmm8
+vpaddd %xmm9, %xmm14, %xmm9
+vmovdqa %xmm15, 96(%rsp)
+vpxor %xmm5, %xmm10, %xmm5
+vpxor %xmm6, %xmm11, %xmm6
+vpslld $ 7, %xmm5, %xmm15
+vpsrld $25, %xmm5, %xmm5
+vpxor %xmm5, %xmm15, %xmm5
+vpslld $ 7, %xmm6, %xmm15
+vpsrld $25, %xmm6, %xmm6
+vpxor %xmm6, %xmm15, %xmm6
+vpxor %xmm7, %xmm8, %xmm7
+vpxor %xmm4, %xmm9, %xmm4
+vpslld $ 7, %xmm7, %xmm15
+vpsrld $25, %xmm7, %xmm7
+vpxor %xmm7, %xmm15, %xmm7
+vpslld $ 7, %xmm4, %xmm15
+vpsrld $25, %xmm4, %xmm4
+vpxor %xmm4, %xmm15, %xmm4
+vmovdqa 96(%rsp), %xmm15
+subq $2, %rax
+jnz chacha_blocks_avx2_mainloop2
+vmovdqa %xmm8, 192(%rsp)
+vmovdqa %xmm9, 208(%rsp)
+vmovdqa %xmm10, 224(%rsp)
+vmovdqa %xmm11, 240(%rsp)
+vmovdqa %xmm12, 256(%rsp)
+vmovdqa %xmm13, 272(%rsp)
+vmovdqa %xmm14, 288(%rsp)
+vmovdqa %xmm15, 304(%rsp)
+vpbroadcastd 0(%rsp), %xmm8
+vpbroadcastd 4+0(%rsp), %xmm9
+vpbroadcastd 8+0(%rsp), %xmm10
+vpbroadcastd 12+0(%rsp), %xmm11
+vpbroadcastd 16(%rsp), %xmm12
+vpbroadcastd 4+16(%rsp), %xmm13
+vpbroadcastd 8+16(%rsp), %xmm14
+vpbroadcastd 12+16(%rsp), %xmm15
+vpaddd %xmm8, %xmm0, %xmm0
+vpaddd %xmm9, %xmm1, %xmm1
+vpaddd %xmm10, %xmm2, %xmm2
+vpaddd %xmm11, %xmm3, %xmm3
+vpaddd %xmm12, %xmm4, %xmm4
+vpaddd %xmm13, %xmm5, %xmm5
+vpaddd %xmm14, %xmm6, %xmm6
+vpaddd %xmm15, %xmm7, %xmm7
+vpunpckldq %xmm1, %xmm0, %xmm8
+vpunpckldq %xmm3, %xmm2, %xmm9
+vpunpckhdq %xmm1, %xmm0, %xmm12
+vpunpckhdq %xmm3, %xmm2, %xmm13
+vpunpckldq %xmm5, %xmm4, %xmm10
+vpunpckldq %xmm7, %xmm6, %xmm11
+vpunpckhdq %xmm5, %xmm4, %xmm14
+vpunpckhdq %xmm7, %xmm6, %xmm15
+vpunpcklqdq %xmm9, %xmm8, %xmm0
+vpunpcklqdq %xmm11, %xmm10, %xmm1
+vpunpckhqdq %xmm9, %xmm8, %xmm2
+vpunpckhqdq %xmm11, %xmm10, %xmm3
+vpunpcklqdq %xmm13, %xmm12, %xmm4
+vpunpcklqdq %xmm15, %xmm14, %xmm5
+vpunpckhqdq %xmm13, %xmm12, %xmm6
+vpunpckhqdq %xmm15, %xmm14, %xmm7
+andq %rsi, %rsi
+jz chacha_blocks_avx2_noinput2
+vpxor 0(%rsi), %xmm0, %xmm0
+vpxor 16(%rsi), %xmm1, %xmm1
+vpxor 64(%rsi), %xmm2, %xmm2
+vpxor 80(%rsi), %xmm3, %xmm3
+vpxor 128(%rsi), %xmm4, %xmm4
+vpxor 144(%rsi), %xmm5, %xmm5
+vpxor 192(%rsi), %xmm6, %xmm6
+vpxor 208(%rsi), %xmm7, %xmm7
+vmovdqu %xmm0, 0(%rdx)
+vmovdqu %xmm1, 16(%rdx)
+vmovdqu %xmm2, 64(%rdx)
+vmovdqu %xmm3, 80(%rdx)
+vmovdqu %xmm4, 128(%rdx)
+vmovdqu %xmm5, 144(%rdx)
+vmovdqu %xmm6, 192(%rdx)
+vmovdqu %xmm7, 208(%rdx)
+vmovdqa 192(%rsp), %xmm0
+vmovdqa 208(%rsp), %xmm1
+vmovdqa 224(%rsp), %xmm2
+vmovdqa 240(%rsp), %xmm3
+vmovdqa 256(%rsp), %xmm4
+vmovdqa 272(%rsp), %xmm5
+vmovdqa 288(%rsp), %xmm6
+vmovdqa 304(%rsp), %xmm7
+vpbroadcastd 32(%rsp), %xmm8
+vpbroadcastd 4+32(%rsp), %xmm9
+vpbroadcastd 8+32(%rsp), %xmm10
+vpbroadcastd 12+32(%rsp), %xmm11
+vmovdqa 128(%rsp), %xmm12
+vmovdqa 160(%rsp), %xmm13
+vpbroadcastd 8+48(%rsp), %xmm14
+vpbroadcastd 12+48(%rsp), %xmm15
+vpaddd %xmm8, %xmm0, %xmm0
+vpaddd %xmm9, %xmm1, %xmm1
+vpaddd %xmm10, %xmm2, %xmm2
+vpaddd %xmm11, %xmm3, %xmm3
+vpaddd %xmm12, %xmm4, %xmm4
+vpaddd %xmm13, %xmm5, %xmm5
+vpaddd %xmm14, %xmm6, %xmm6
+vpaddd %xmm15, %xmm7, %xmm7
+vpunpckldq %xmm1, %xmm0, %xmm8
+vpunpckldq %xmm3, %xmm2, %xmm9
+vpunpckhdq %xmm1, %xmm0, %xmm12
+vpunpckhdq %xmm3, %xmm2, %xmm13
+vpunpckldq %xmm5, %xmm4, %xmm10
+vpunpckldq %xmm7, %xmm6, %xmm11
+vpunpckhdq %xmm5, %xmm4, %xmm14
+vpunpckhdq %xmm7, %xmm6, %xmm15
+vpunpcklqdq %xmm9, %xmm8, %xmm0
+vpunpcklqdq %xmm11, %xmm10, %xmm1
+vpunpckhqdq %xmm9, %xmm8, %xmm2
+vpunpckhqdq %xmm11, %xmm10, %xmm3
+vpunpcklqdq %xmm13, %xmm12, %xmm4
+vpunpcklqdq %xmm15, %xmm14, %xmm5
+vpunpckhqdq %xmm13, %xmm12, %xmm6
+vpunpckhqdq %xmm15, %xmm14, %xmm7
+vpxor 32(%rsi), %xmm0, %xmm0
+vpxor 48(%rsi), %xmm1, %xmm1
+vpxor 96(%rsi), %xmm2, %xmm2
+vpxor 112(%rsi), %xmm3, %xmm3
+vpxor 160(%rsi), %xmm4, %xmm4
+vpxor 176(%rsi), %xmm5, %xmm5
+vpxor 224(%rsi), %xmm6, %xmm6
+vpxor 240(%rsi), %xmm7, %xmm7
+vmovdqu %xmm0, 32(%rdx)
+vmovdqu %xmm1, 48(%rdx)
+vmovdqu %xmm2, 96(%rdx)
+vmovdqu %xmm3, 112(%rdx)
+vmovdqu %xmm4, 160(%rdx)
+vmovdqu %xmm5, 176(%rdx)
+vmovdqu %xmm6, 224(%rdx)
+vmovdqu %xmm7, 240(%rdx)
+addq $256, %rsi
+jmp chacha_blocks_avx2_mainloop2_cont
+chacha_blocks_avx2_noinput2:
+vmovdqu %xmm0, 0(%rdx)
+vmovdqu %xmm1, 16(%rdx)
+vmovdqu %xmm2, 64(%rdx)
+vmovdqu %xmm3, 80(%rdx)
+vmovdqu %xmm4, 128(%rdx)
+vmovdqu %xmm5, 144(%rdx)
+vmovdqu %xmm6, 192(%rdx)
+vmovdqu %xmm7, 208(%rdx)
+vmovdqa 192(%rsp), %xmm0
+vmovdqa 208(%rsp), %xmm1
+vmovdqa 224(%rsp), %xmm2
+vmovdqa 240(%rsp), %xmm3
+vmovdqa 256(%rsp), %xmm4
+vmovdqa 272(%rsp), %xmm5
+vmovdqa 288(%rsp), %xmm6
+vmovdqa 304(%rsp), %xmm7
+vpbroadcastd 32(%rsp), %xmm8
+vpbroadcastd 4+32(%rsp), %xmm9
+vpbroadcastd 8+32(%rsp), %xmm10
+vpbroadcastd 12+32(%rsp), %xmm11
+vmovdqa 128(%rsp), %xmm12
+vmovdqa 160(%rsp), %xmm13
+vpbroadcastd 8+48(%rsp), %xmm14
+vpbroadcastd 12+48(%rsp), %xmm15
+vpaddd %xmm8, %xmm0, %xmm0
+vpaddd %xmm9, %xmm1, %xmm1
+vpaddd %xmm10, %xmm2, %xmm2
+vpaddd %xmm11, %xmm3, %xmm3
+vpaddd %xmm12, %xmm4, %xmm4
+vpaddd %xmm13, %xmm5, %xmm5
+vpaddd %xmm14, %xmm6, %xmm6
+vpaddd %xmm15, %xmm7, %xmm7
+vpunpckldq %xmm1, %xmm0, %xmm8
+vpunpckldq %xmm3, %xmm2, %xmm9
+vpunpckhdq %xmm1, %xmm0, %xmm12
+vpunpckhdq %xmm3, %xmm2, %xmm13
+vpunpckldq %xmm5, %xmm4, %xmm10
+vpunpckldq %xmm7, %xmm6, %xmm11
+vpunpckhdq %xmm5, %xmm4, %xmm14
+vpunpckhdq %xmm7, %xmm6, %xmm15
+vpunpcklqdq %xmm9, %xmm8, %xmm0
+vpunpcklqdq %xmm11, %xmm10, %xmm1
+vpunpckhqdq %xmm9, %xmm8, %xmm2
+vpunpckhqdq %xmm11, %xmm10, %xmm3
+vpunpcklqdq %xmm13, %xmm12, %xmm4
+vpunpcklqdq %xmm15, %xmm14, %xmm5
+vpunpckhqdq %xmm13, %xmm12, %xmm6
+vpunpckhqdq %xmm15, %xmm14, %xmm7
+vmovdqu %xmm0, 32(%rdx)
+vmovdqu %xmm1, 48(%rdx)
+vmovdqu %xmm2, 96(%rdx)
+vmovdqu %xmm3, 112(%rdx)
+vmovdqu %xmm4, 160(%rdx)
+vmovdqu %xmm5, 176(%rdx)
+vmovdqu %xmm6, 224(%rdx)
+vmovdqu %xmm7, 240(%rdx)
+chacha_blocks_avx2_mainloop2_cont:
+addq $256, %rdx
+subq $256, %rcx
+cmp $256, %rcx
+jae chacha_blocks_avx2_atleast256
+chacha_blocks_avx2_below256_fixup:
+vmovdqa 448(%rsp), %xmm6
+vmovdqa 480(%rsp), %xmm7
+vmovdqa 0(%rsp), %xmm8
+vmovdqa 16(%rsp), %xmm9
+vmovdqa 32(%rsp), %xmm10
+vmovdqa 48(%rsp), %xmm11
+movq $1, %r9
+chacha_blocks_avx2_below256:
+vmovq %r9, %xmm5
+andq %rcx, %rcx
+jz chacha_blocks_avx2_done
+cmpq $64, %rcx
+jae chacha_blocks_avx2_above63
+movq %rdx, %r9
+andq %rsi, %rsi
+jz chacha_blocks_avx2_noinput3
+movq %rcx, %r10
+movq %rsp, %rdx
+addq %r10, %rsi
+addq %r10, %rdx
+negq %r10
+chacha_blocks_avx2_copyinput:
+movb (%rsi, %r10), %al
+movb %al, (%rdx, %r10)
+incq %r10
+jnz chacha_blocks_avx2_copyinput
+movq %rsp, %rsi
+chacha_blocks_avx2_noinput3:
+movq %rsp, %rdx
+chacha_blocks_avx2_above63:
+vmovdqa %xmm8, %xmm0
+vmovdqa %xmm9, %xmm1
+vmovdqa %xmm10, %xmm2
+vmovdqa %xmm11, %xmm3
+movq 64(%rsp), %rax
+chacha_blocks_avx2_mainloop3:
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm6, %xmm3, %xmm3
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpslld $12, %xmm1, %xmm4
+vpsrld $20, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm7, %xmm3, %xmm3
+vpshufd $0x93, %xmm0, %xmm0
+vpaddd %xmm2, %xmm3, %xmm2
+vpshufd $0x4e, %xmm3, %xmm3
+vpxor %xmm1, %xmm2, %xmm1
+vpshufd $0x39, %xmm2, %xmm2
+vpslld $7, %xmm1, %xmm4
+vpsrld $25, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm6, %xmm3, %xmm3
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpslld $12, %xmm1, %xmm4
+vpsrld $20, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm7, %xmm3, %xmm3
+vpshufd $0x39, %xmm0, %xmm0
+vpaddd %xmm2, %xmm3, %xmm2
+vpshufd $0x4e, %xmm3, %xmm3
+vpxor %xmm1, %xmm2, %xmm1
+vpshufd $0x93, %xmm2, %xmm2
+vpslld $7, %xmm1, %xmm4
+vpsrld $25, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+subq $2, %rax
+jnz chacha_blocks_avx2_mainloop3
+vpaddd %xmm0, %xmm8, %xmm0
+vpaddd %xmm1, %xmm9, %xmm1
+vpaddd %xmm2, %xmm10, %xmm2
+vpaddd %xmm3, %xmm11, %xmm3
+andq %rsi, %rsi
+jz chacha_blocks_avx2_noinput4
+vpxor 0(%rsi), %xmm0, %xmm0
+vpxor 16(%rsi), %xmm1, %xmm1
+vpxor 32(%rsi), %xmm2, %xmm2
+vpxor 48(%rsi), %xmm3, %xmm3
+addq $64, %rsi
+chacha_blocks_avx2_noinput4:
+vmovdqu %xmm0, 0(%rdx)
+vmovdqu %xmm1, 16(%rdx)
+vmovdqu %xmm2, 32(%rdx)
+vmovdqu %xmm3, 48(%rdx)
+vpaddq %xmm11, %xmm5, %xmm11
+cmpq $64, %rcx
+jbe chacha_blocks_avx2_mainloop3_finishup
+addq $64, %rdx
+subq $64, %rcx
+jmp chacha_blocks_avx2_below256
+chacha_blocks_avx2_mainloop3_finishup:
+cmpq $64, %rcx
+je chacha_blocks_avx2_done
+addq %rcx, %r9
+addq %rcx, %rdx
+negq %rcx
+chacha_blocks_avx2_copyoutput:
+movb (%rdx, %rcx), %al
+movb %al, (%r9, %rcx)
+incq %rcx
+jnz chacha_blocks_avx2_copyoutput
+chacha_blocks_avx2_done:
+vmovdqu %xmm11, 32(%rdi)
+movq %rbp, %rsp
+popq %r14
+popq %r13
+popq %r12
+popq %rbp
+popq %rbx
+vzeroupper
+ret
+FN_END chacha_blocks_avx2
+
+
+GLOBAL_HIDDEN_FN hchacha_avx2
+hchacha_avx2_local:
+LOAD_VAR_PIC chacha_constants, %rax
+vmovdqa 0(%rax), %xmm0
+vmovdqa 16(%rax), %xmm6
+vmovdqa 32(%rax), %xmm5
+vmovdqu 0(%rdi), %xmm1
+vmovdqu 16(%rdi), %xmm2
+vmovdqu 0(%rsi), %xmm3
+hhacha_mainloop_avx2:
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm6, %xmm3, %xmm3
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpslld $12, %xmm1, %xmm4
+vpsrld $20, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm5, %xmm3, %xmm3
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpslld $7, %xmm1, %xmm4
+vpsrld $25, %xmm1, %xmm1
+vpshufd $0x93, %xmm0, %xmm0
+vpxor %xmm1, %xmm4, %xmm1
+vpshufd $0x4e, %xmm3, %xmm3
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm6, %xmm3, %xmm3
+vpshufd $0x39, %xmm2, %xmm2
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpslld $12, %xmm1, %xmm4
+vpsrld $20, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm5, %xmm3, %xmm3
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpshufd $0x39, %xmm0, %xmm0
+vpslld $7, %xmm1, %xmm4
+vpshufd $0x4e, %xmm3, %xmm3
+vpsrld $25, %xmm1, %xmm1
+vpshufd $0x93, %xmm2, %xmm2
+vpxor %xmm1, %xmm4, %xmm1
+subl $2, %ecx
+jne hhacha_mainloop_avx2
+vmovdqu %xmm0, (%rdx)
+vmovdqu %xmm3, 16(%rdx)
+ret
+FN_END hchacha_avx2
+
+GLOBAL_HIDDEN_FN_EXT chacha_avx2, 6, 16
+pushq %rbp
+movq %rsp, %rbp
+subq $64, %rsp
+andq $~63, %rsp
+vmovdqu 0(%rdi), %xmm0
+vmovdqu 16(%rdi), %xmm1
+vmovdqa %xmm0, 0(%rsp)
+vmovdqa %xmm1, 16(%rsp)
+xorq %rdi, %rdi
+movq %rdi, 32(%rsp)
+movq 0(%rsi), %rsi
+movq %rsi, 40(%rsp)
+movq %r9, 48(%rsp)
+movq %rsp, %rdi
+movq %rdx, %rsi
+movq %rcx, %rdx
+movq %r8, %rcx
+call chacha_blocks_avx2_local
+vpxor %xmm0, %xmm0, %xmm0
+vmovdqa %xmm0, 0(%rsp)
+vmovdqa %xmm0, 16(%rsp)
+vmovdqa %xmm0, 32(%rsp)
+movq %rbp, %rsp
+popq %rbp
+ret
+FN_END chacha_avx2
+
+GLOBAL_HIDDEN_FN_EXT xchacha_avx2, 6, 16
+pushq %rbp
+pushq %rbx
+movq %rsp, %rbp
+subq $64, %rsp
+andq $~63, %rsp
+movq %rsp, %rbx
+xorq %rax, %rax
+movq %rax, 32(%rbx)
+movq 16(%rsi), %rax
+movq %rax, 40(%rbx)
+movq %r9, 48(%rbx)
+pushq %rdx
+pushq %rcx
+pushq %r8
+movq %rbx, %rdx
+movq %r9, %rcx
+call hchacha_avx2_local
+movq %rbx, %rdi
+popq %rcx
+popq %rdx
+popq %rsi
+call chacha_blocks_avx2_local
+vpxor %xmm0, %xmm0, %xmm0
+vmovdqa %xmm0, 0(%rbx)
+vmovdqa %xmm0, 16(%rbx)
+vmovdqa %xmm0, 32(%rbx)
+movq %rbp, %rsp
+popq %rbx
+popq %rbp
+ret
+FN_END xchacha_avx2
diff --git a/src/libcryptobox/chacha20/chacha.c b/src/libcryptobox/chacha20/chacha.c
new file mode 100644
index 0000000..0b471c8
--- /dev/null
+++ b/src/libcryptobox/chacha20/chacha.c
@@ -0,0 +1,262 @@
+/* Copyright (c) 2015, Vsevolod Stakhov
+ * Copyright (c) 2015, Andrew Moon
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "cryptobox.h"
+#include "chacha.h"
+#include "platform_config.h"
+
+extern unsigned cpu_config;
+
+typedef struct chacha_impl_t {
+ unsigned long cpu_flags;
+ const char *desc;
+ void (*chacha)(const chacha_key *key, const chacha_iv *iv,
+ const unsigned char *in, unsigned char *out, size_t inlen,
+ size_t rounds);
+ void (*xchacha)(const chacha_key *key, const chacha_iv24 *iv,
+ const unsigned char *in, unsigned char *out, size_t inlen,
+ size_t rounds);
+ void (*chacha_blocks)(chacha_state_internal *state,
+ const unsigned char *in, unsigned char *out, size_t bytes);
+ void (*hchacha)(const unsigned char key[32], const unsigned char iv[16],
+ unsigned char out[32], size_t rounds);
+} chacha_impl_t;
+
+#define CHACHA_DECLARE(ext) \
+ void chacha_##ext(const chacha_key *key, const chacha_iv *iv, const unsigned char *in, unsigned char *out, size_t inlen, size_t rounds); \
+ void xchacha_##ext(const chacha_key *key, const chacha_iv24 *iv, const unsigned char *in, unsigned char *out, size_t inlen, size_t rounds); \
+ void chacha_blocks_##ext(chacha_state_internal *state, const unsigned char *in, unsigned char *out, size_t bytes); \
+ void hchacha_##ext(const unsigned char key[32], const unsigned char iv[16], unsigned char out[32], size_t rounds);
+#define CHACHA_IMPL(cpuflags, desc, ext) \
+ { \
+ (cpuflags), desc, chacha_##ext, xchacha_##ext, chacha_blocks_##ext, hchacha_##ext \
+ }
+
+#if defined(HAVE_AVX2) && defined(__x86_64__)
+CHACHA_DECLARE(avx2)
+#define CHACHA_AVX2 CHACHA_IMPL(CPUID_AVX2, "avx2", avx2)
+#endif
+#if defined(HAVE_AVX) && defined(__x86_64__)
+CHACHA_DECLARE(avx)
+#define CHACHA_AVX CHACHA_IMPL(CPUID_AVX, "avx", avx)
+#endif
+#if defined(HAVE_SSE2) && defined(__x86_64__)
+CHACHA_DECLARE(sse2)
+#define CHACHA_SSE2 CHACHA_IMPL(CPUID_SSE2, "sse2", sse2)
+#endif
+
+CHACHA_DECLARE(ref)
+#define CHACHA_GENERIC CHACHA_IMPL(0, "generic", ref)
+
+static const chacha_impl_t chacha_list[] = {
+ CHACHA_GENERIC,
+#if defined(CHACHA_AVX2) && defined(__x86_64__)
+ CHACHA_AVX2,
+#endif
+#if defined(CHACHA_AVX) && defined(__x86_64__)
+ CHACHA_AVX,
+#endif
+#if defined(CHACHA_SSE2) && defined(__x86_64__)
+ CHACHA_SSE2
+#endif
+};
+
+static const chacha_impl_t *chacha_impl = &chacha_list[0];
+
+static int
+chacha_is_aligned(const void *p)
+{
+ return ((size_t) p & (sizeof(size_t) - 1)) == 0;
+}
+
+const char *
+chacha_load(void)
+{
+ guint i;
+
+ if (cpu_config != 0) {
+ for (i = 0; i < G_N_ELEMENTS(chacha_list); i++) {
+ if (chacha_list[i].cpu_flags & cpu_config) {
+ chacha_impl = &chacha_list[i];
+ break;
+ }
+ }
+ }
+
+ return chacha_impl->desc;
+}
+
+void chacha_init(chacha_state *S, const chacha_key *key,
+ const chacha_iv *iv, size_t rounds)
+{
+ chacha_state_internal *state = (chacha_state_internal *) S;
+ memcpy(state->s + 0, key, 32);
+ memset(state->s + 32, 0, 8);
+ memcpy(state->s + 40, iv, 8);
+ state->rounds = rounds;
+ state->leftover = 0;
+}
+
+/* processes inlen bytes (can do partial blocks), handling input/output alignment */
+static void
+chacha_consume(chacha_state_internal *state,
+ const unsigned char *in, unsigned char *out, size_t inlen)
+{
+ unsigned char buffer[16 * CHACHA_BLOCKBYTES];
+ int in_aligned, out_aligned;
+
+ /* it's ok to call with 0 bytes */
+ if (!inlen)
+ return;
+
+ /* if everything is aligned, handle directly */
+ in_aligned = chacha_is_aligned(in);
+ out_aligned = chacha_is_aligned(out);
+ if (in_aligned && out_aligned) {
+ chacha_impl->chacha_blocks(state, in, out, inlen);
+ return;
+ }
+
+ /* copy the unaligned data to an aligned buffer and process in chunks */
+ while (inlen) {
+ const size_t bytes = (inlen > sizeof(buffer)) ? sizeof(buffer) : inlen;
+ const unsigned char *src = in;
+ unsigned char *dst = (out_aligned) ? out : buffer;
+ if (!in_aligned) {
+ memcpy(buffer, in, bytes);
+ src = buffer;
+ }
+ chacha_impl->chacha_blocks(state, src, dst, bytes);
+ if (!out_aligned)
+ memcpy(out, buffer, bytes);
+ if (in)
+ in += bytes;
+ out += bytes;
+ inlen -= bytes;
+ }
+}
+
+/* hchacha */
+void hchacha(const unsigned char key[32],
+ const unsigned char iv[16], unsigned char out[32], size_t rounds)
+{
+ chacha_impl->hchacha(key, iv, out, rounds);
+}
+
+/* update, returns number of bytes written to out */
+size_t
+chacha_update(chacha_state *S, const unsigned char *in, unsigned char *out,
+ size_t inlen)
+{
+ chacha_state_internal *state = (chacha_state_internal *) S;
+ unsigned char *out_start = out;
+ size_t bytes;
+
+ /* enough for at least one block? */
+ while ((state->leftover + inlen) >= CHACHA_BLOCKBYTES) {
+ /* handle the previous data */
+ if (state->leftover) {
+ bytes = (CHACHA_BLOCKBYTES - state->leftover);
+ if (in) {
+ memcpy(state->buffer + state->leftover, in, bytes);
+ in += bytes;
+ }
+ chacha_consume(state, (in) ? state->buffer : NULL, out,
+ CHACHA_BLOCKBYTES);
+ inlen -= bytes;
+ out += CHACHA_BLOCKBYTES;
+ state->leftover = 0;
+ }
+
+ /* handle the direct data */
+ bytes = (inlen & ~(CHACHA_BLOCKBYTES - 1));
+ if (bytes) {
+ chacha_consume(state, in, out, bytes);
+ inlen -= bytes;
+ if (in)
+ in += bytes;
+ out += bytes;
+ }
+ }
+
+ /* handle leftover data */
+ if (inlen) {
+ if (in)
+ memcpy(state->buffer + state->leftover, in, inlen);
+ else
+ memset(state->buffer + state->leftover, 0, inlen);
+ state->leftover += inlen;
+ }
+
+ return out - out_start;
+}
+
+/* finalize, write out any leftover data */
+size_t
+chacha_final(chacha_state *S, unsigned char *out)
+{
+ chacha_state_internal *state = (chacha_state_internal *) S;
+ size_t leftover = state->leftover;
+ if (leftover) {
+ if (chacha_is_aligned(out)) {
+ chacha_impl->chacha_blocks(state, state->buffer, out, leftover);
+ }
+ else {
+ chacha_impl->chacha_blocks(state, state->buffer, state->buffer,
+ leftover);
+ memcpy(out, state->buffer, leftover);
+ }
+ }
+ rspamd_explicit_memzero(S, sizeof(chacha_state));
+ return leftover;
+}
+
+/* one-shot, input/output assumed to be word aligned */
+void chacha(const chacha_key *key, const chacha_iv *iv,
+ const unsigned char *in, unsigned char *out, size_t inlen,
+ size_t rounds)
+{
+ chacha_impl->chacha(key, iv, in, out, inlen, rounds);
+}
+
+/*
+ xchacha, chacha with a 192 bit nonce
+ */
+
+void xchacha_init(chacha_state *S, const chacha_key *key,
+ const chacha_iv24 *iv, size_t rounds)
+{
+ chacha_key subkey;
+ hchacha(key->b, iv->b, subkey.b, rounds);
+ chacha_init(S, &subkey, (chacha_iv *) (iv->b + 16), rounds);
+}
+
+/* one-shot, input/output assumed to be word aligned */
+void xchacha(const chacha_key *key, const chacha_iv24 *iv,
+ const unsigned char *in, unsigned char *out, size_t inlen,
+ size_t rounds)
+{
+ chacha_impl->xchacha(key, iv, in, out, inlen, rounds);
+}
diff --git a/src/libcryptobox/chacha20/chacha.h b/src/libcryptobox/chacha20/chacha.h
new file mode 100644
index 0000000..d05088a
--- /dev/null
+++ b/src/libcryptobox/chacha20/chacha.h
@@ -0,0 +1,87 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015 Andrew Moon, Vsevolod Stakhov
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+
+#ifndef CHACHA_H_
+#define CHACHA_H_
+
+
+#define CHACHA_BLOCKBYTES 64
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct chacha_state_internal_t {
+ unsigned char s[48];
+ size_t rounds;
+ size_t leftover;
+ unsigned char buffer[CHACHA_BLOCKBYTES];
+} chacha_state_internal;
+
+typedef struct chacha_state_t {
+ unsigned char opaque[128];
+} chacha_state;
+
+typedef struct chacha_key_t {
+ unsigned char b[32];
+} chacha_key;
+
+typedef struct chacha_iv_t {
+ unsigned char b[8];
+} chacha_iv;
+
+typedef struct chacha_iv24_t {
+ unsigned char b[24];
+} chacha_iv24;
+
+void hchacha(const unsigned char key[32], const unsigned char iv[16],
+ unsigned char out[32], size_t rounds);
+
+void chacha_init(chacha_state *S, const chacha_key *key, const chacha_iv *iv,
+ size_t rounds);
+
+void xchacha_init(chacha_state *S, const chacha_key *key,
+ const chacha_iv24 *iv, size_t rounds);
+
+size_t chacha_update(chacha_state *S, const unsigned char *in,
+ unsigned char *out, size_t inlen);
+
+size_t chacha_final(chacha_state *S, unsigned char *out);
+
+void chacha(const chacha_key *key, const chacha_iv *iv,
+ const unsigned char *in, unsigned char *out, size_t inlen,
+ size_t rounds);
+
+void xchacha(const chacha_key *key, const chacha_iv24 *iv,
+ const unsigned char *in, unsigned char *out, size_t inlen,
+ size_t rounds);
+
+const char *chacha_load(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CHACHA_H_ */
diff --git a/src/libcryptobox/chacha20/constants.S b/src/libcryptobox/chacha20/constants.S
new file mode 100644
index 0000000..ff109a3
--- /dev/null
+++ b/src/libcryptobox/chacha20/constants.S
@@ -0,0 +1,6 @@
+SECTION_RODATA
+.p2align 4,,15
+chacha_constants:
+.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 /* "expand 32-byte k" */
+.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 /* pshufb rotate by 16 */
+.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 /* pshufb rotate by 8 */
diff --git a/src/libcryptobox/chacha20/ref.c b/src/libcryptobox/chacha20/ref.c
new file mode 100644
index 0000000..ee646db
--- /dev/null
+++ b/src/libcryptobox/chacha20/ref.c
@@ -0,0 +1,272 @@
+#include "config.h"
+#include "chacha.h"
+#include "cryptobox.h"
+
+#if defined(HAVE_INT32)
+typedef uint32_t chacha_int32;
+#else
+typedef guint32 chacha_int32;
+#endif
+
+/* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */
+static chacha_int32
+U8TO32(const unsigned char *p)
+{
+ return (((chacha_int32) (p[0])) |
+ ((chacha_int32) (p[1]) << 8) |
+ ((chacha_int32) (p[2]) << 16) |
+ ((chacha_int32) (p[3]) << 24));
+}
+
+/* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */
+static void
+U32TO8(unsigned char *p, chacha_int32 v)
+{
+ p[0] = (v) &0xff;
+ p[1] = (v >> 8) & 0xff;
+ p[2] = (v >> 16) & 0xff;
+ p[3] = (v >> 24) & 0xff;
+}
+
+/* 32 bit left rotate */
+static chacha_int32
+ROTL32(chacha_int32 x, int k)
+{
+ return ((x << k) | (x >> (32 - k))) & 0xffffffff;
+}
+
+/* "expand 32-byte k", as 4 little endian 32-bit unsigned integers */
+static const chacha_int32 chacha_constants[4] = {
+ 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574};
+
+void chacha_blocks_ref(chacha_state_internal *state, const unsigned char *in, unsigned char *out, size_t bytes)
+{
+ chacha_int32 x[16], j[12];
+ chacha_int32 t;
+ unsigned char *ctarget = out, tmp[64];
+ size_t i, r;
+
+ if (!bytes) return;
+
+ j[0] = U8TO32(state->s + 0);
+ j[1] = U8TO32(state->s + 4);
+ j[2] = U8TO32(state->s + 8);
+ j[3] = U8TO32(state->s + 12);
+ j[4] = U8TO32(state->s + 16);
+ j[5] = U8TO32(state->s + 20);
+ j[6] = U8TO32(state->s + 24);
+ j[7] = U8TO32(state->s + 28);
+ j[8] = U8TO32(state->s + 32);
+ j[9] = U8TO32(state->s + 36);
+ j[10] = U8TO32(state->s + 40);
+ j[11] = U8TO32(state->s + 44);
+
+ r = state->rounds;
+
+ for (;;) {
+ if (bytes < 64) {
+ if (in) {
+ for (i = 0; i < bytes; i++) tmp[i] = in[i];
+ in = tmp;
+ }
+ ctarget = out;
+ out = tmp;
+ }
+
+ x[0] = chacha_constants[0];
+ x[1] = chacha_constants[1];
+ x[2] = chacha_constants[2];
+ x[3] = chacha_constants[3];
+ x[4] = j[0];
+ x[5] = j[1];
+ x[6] = j[2];
+ x[7] = j[3];
+ x[8] = j[4];
+ x[9] = j[5];
+ x[10] = j[6];
+ x[11] = j[7];
+ x[12] = j[8];
+ x[13] = j[9];
+ x[14] = j[10];
+ x[15] = j[11];
+
+#define quarter(a, b, c, d) \
+ a += b; \
+ t = d ^ a; \
+ d = ROTL32(t, 16); \
+ c += d; \
+ t = b ^ c; \
+ b = ROTL32(t, 12); \
+ a += b; \
+ t = d ^ a; \
+ d = ROTL32(t, 8); \
+ c += d; \
+ t = b ^ c; \
+ b = ROTL32(t, 7);
+
+#define doubleround() \
+ quarter(x[0], x[4], x[8], x[12]) \
+ quarter(x[1], x[5], x[9], x[13]) \
+ quarter(x[2], x[6], x[10], x[14]) \
+ quarter(x[3], x[7], x[11], x[15]) \
+ quarter(x[0], x[5], x[10], x[15]) \
+ quarter(x[1], x[6], x[11], x[12]) \
+ quarter(x[2], x[7], x[8], x[13]) \
+ quarter(x[3], x[4], x[9], x[14])
+
+ i = r;
+ do {
+ doubleround()
+ i -= 2;
+ } while (i);
+
+ x[0] += chacha_constants[0];
+ x[1] += chacha_constants[1];
+ x[2] += chacha_constants[2];
+ x[3] += chacha_constants[3];
+ x[4] += j[0];
+ x[5] += j[1];
+ x[6] += j[2];
+ x[7] += j[3];
+ x[8] += j[4];
+ x[9] += j[5];
+ x[10] += j[6];
+ x[11] += j[7];
+ x[12] += j[8];
+ x[13] += j[9];
+ x[14] += j[10];
+ x[15] += j[11];
+
+ if (in) {
+ U32TO8(out + 0, x[0] ^ U8TO32(in + 0));
+ U32TO8(out + 4, x[1] ^ U8TO32(in + 4));
+ U32TO8(out + 8, x[2] ^ U8TO32(in + 8));
+ U32TO8(out + 12, x[3] ^ U8TO32(in + 12));
+ U32TO8(out + 16, x[4] ^ U8TO32(in + 16));
+ U32TO8(out + 20, x[5] ^ U8TO32(in + 20));
+ U32TO8(out + 24, x[6] ^ U8TO32(in + 24));
+ U32TO8(out + 28, x[7] ^ U8TO32(in + 28));
+ U32TO8(out + 32, x[8] ^ U8TO32(in + 32));
+ U32TO8(out + 36, x[9] ^ U8TO32(in + 36));
+ U32TO8(out + 40, x[10] ^ U8TO32(in + 40));
+ U32TO8(out + 44, x[11] ^ U8TO32(in + 44));
+ U32TO8(out + 48, x[12] ^ U8TO32(in + 48));
+ U32TO8(out + 52, x[13] ^ U8TO32(in + 52));
+ U32TO8(out + 56, x[14] ^ U8TO32(in + 56));
+ U32TO8(out + 60, x[15] ^ U8TO32(in + 60));
+ in += 64;
+ }
+ else {
+ U32TO8(out + 0, x[0]);
+ U32TO8(out + 4, x[1]);
+ U32TO8(out + 8, x[2]);
+ U32TO8(out + 12, x[3]);
+ U32TO8(out + 16, x[4]);
+ U32TO8(out + 20, x[5]);
+ U32TO8(out + 24, x[6]);
+ U32TO8(out + 28, x[7]);
+ U32TO8(out + 32, x[8]);
+ U32TO8(out + 36, x[9]);
+ U32TO8(out + 40, x[10]);
+ U32TO8(out + 44, x[11]);
+ U32TO8(out + 48, x[12]);
+ U32TO8(out + 52, x[13]);
+ U32TO8(out + 56, x[14]);
+ U32TO8(out + 60, x[15]);
+ }
+
+ /* increment the 64 bit counter, split in to two 32 bit halves */
+ j[8]++;
+ if (!j[8])
+ j[9]++;
+
+ if (bytes <= 64) {
+ if (bytes < 64)
+ for (i = 0; i < bytes; i++) ctarget[i] = out[i];
+
+ /* store the counter back to the state */
+ U32TO8(state->s + 32, j[8]);
+ U32TO8(state->s + 36, j[9]);
+ goto cleanup;
+ }
+ bytes -= 64;
+ out += 64;
+ }
+
+cleanup:
+ rspamd_explicit_memzero(j, sizeof(j));
+}
+
+void hchacha_ref(const unsigned char key[32], const unsigned char iv[16], unsigned char out[32], size_t rounds)
+{
+ chacha_int32 x[16];
+ chacha_int32 t;
+
+ x[0] = chacha_constants[0];
+ x[1] = chacha_constants[1];
+ x[2] = chacha_constants[2];
+ x[3] = chacha_constants[3];
+ x[4] = U8TO32(key + 0);
+ x[5] = U8TO32(key + 4);
+ x[6] = U8TO32(key + 8);
+ x[7] = U8TO32(key + 12);
+ x[8] = U8TO32(key + 16);
+ x[9] = U8TO32(key + 20);
+ x[10] = U8TO32(key + 24);
+ x[11] = U8TO32(key + 28);
+ x[12] = U8TO32(iv + 0);
+ x[13] = U8TO32(iv + 4);
+ x[14] = U8TO32(iv + 8);
+ x[15] = U8TO32(iv + 12);
+
+ do {
+ doubleround()
+ rounds -= 2;
+ } while (rounds);
+
+ /* indices for the chacha constant */
+ U32TO8(out + 0, x[0]);
+ U32TO8(out + 4, x[1]);
+ U32TO8(out + 8, x[2]);
+ U32TO8(out + 12, x[3]);
+
+ /* indices for the iv */
+ U32TO8(out + 16, x[12]);
+ U32TO8(out + 20, x[13]);
+ U32TO8(out + 24, x[14]);
+ U32TO8(out + 28, x[15]);
+}
+
+void chacha_clear_state_ref(chacha_state_internal *state)
+{
+ rspamd_explicit_memzero(state, 48);
+}
+
+void chacha_ref(const chacha_key *key, const chacha_iv *iv, const unsigned char *in, unsigned char *out, size_t inlen, size_t rounds)
+{
+ chacha_state_internal state;
+ size_t i;
+ for (i = 0; i < 32; i++)
+ state.s[i + 0] = key->b[i];
+ for (i = 0; i < 8; i++)
+ state.s[i + 32] = 0;
+ for (i = 0; i < 8; i++)
+ state.s[i + 40] = iv->b[i];
+ state.rounds = rounds;
+ chacha_blocks_ref(&state, in, out, inlen);
+ chacha_clear_state_ref(&state);
+}
+
+void xchacha_ref(const chacha_key *key, const chacha_iv24 *iv, const unsigned char *in, unsigned char *out, size_t inlen, size_t rounds)
+{
+ chacha_state_internal state;
+ size_t i;
+ hchacha_ref(key->b, iv->b, &state.s[0], rounds);
+ for (i = 0; i < 8; i++)
+ state.s[i + 32] = 0;
+ for (i = 0; i < 8; i++)
+ state.s[i + 40] = iv->b[i + 16];
+ state.rounds = rounds;
+ chacha_blocks_ref(&state, in, out, inlen);
+ chacha_clear_state_ref(&state);
+}
diff --git a/src/libcryptobox/chacha20/sse2.S b/src/libcryptobox/chacha20/sse2.S
new file mode 100644
index 0000000..a91d095
--- /dev/null
+++ b/src/libcryptobox/chacha20/sse2.S
@@ -0,0 +1,734 @@
+#include "../macro.S"
+#include "constants.S"
+SECTION_TEXT
+
+GLOBAL_HIDDEN_FN chacha_blocks_sse2
+chacha_blocks_sse2_local:
+pushq %rbx
+pushq %rbp
+movq %rsp, %rbp
+andq $~63, %rsp
+subq $512, %rsp
+movq $0x3320646e61707865, %rax
+movq $0x6b20657479622d32, %r8
+movd %rax, %xmm8
+movd %r8, %xmm14
+punpcklqdq %xmm14, %xmm8
+movdqu 0(%rdi), %xmm9
+movdqu 16(%rdi), %xmm10
+movdqu 32(%rdi), %xmm11
+movq 48(%rdi), %rax
+movq $1, %r9
+movdqa %xmm8, 0(%rsp)
+movdqa %xmm9, 16(%rsp)
+movdqa %xmm10, 32(%rsp)
+movdqa %xmm11, 48(%rsp)
+movq %rax, 64(%rsp)
+cmpq $256, %rcx
+jb chacha_blocks_sse2_below256
+pshufd $0x00, %xmm8, %xmm0
+pshufd $0x55, %xmm8, %xmm1
+pshufd $0xaa, %xmm8, %xmm2
+pshufd $0xff, %xmm8, %xmm3
+movdqa %xmm0, 128(%rsp)
+movdqa %xmm1, 144(%rsp)
+movdqa %xmm2, 160(%rsp)
+movdqa %xmm3, 176(%rsp)
+pshufd $0x00, %xmm9, %xmm0
+pshufd $0x55, %xmm9, %xmm1
+pshufd $0xaa, %xmm9, %xmm2
+pshufd $0xff, %xmm9, %xmm3
+movdqa %xmm0, 192(%rsp)
+movdqa %xmm1, 208(%rsp)
+movdqa %xmm2, 224(%rsp)
+movdqa %xmm3, 240(%rsp)
+pshufd $0x00, %xmm10, %xmm0
+pshufd $0x55, %xmm10, %xmm1
+pshufd $0xaa, %xmm10, %xmm2
+pshufd $0xff, %xmm10, %xmm3
+movdqa %xmm0, 256(%rsp)
+movdqa %xmm1, 272(%rsp)
+movdqa %xmm2, 288(%rsp)
+movdqa %xmm3, 304(%rsp)
+pshufd $0xaa, %xmm11, %xmm0
+pshufd $0xff, %xmm11, %xmm1
+movdqa %xmm0, 352(%rsp)
+movdqa %xmm1, 368(%rsp)
+jmp chacha_blocks_sse2_atleast256
+.p2align 6,,63
+chacha_blocks_sse2_atleast256:
+movq 48(%rsp), %rax
+leaq 1(%rax), %r8
+leaq 2(%rax), %r9
+leaq 3(%rax), %r10
+leaq 4(%rax), %rbx
+movl %eax, 320(%rsp)
+movl %r8d, 4+320(%rsp)
+movl %r9d, 8+320(%rsp)
+movl %r10d, 12+320(%rsp)
+shrq $32, %rax
+shrq $32, %r8
+shrq $32, %r9
+shrq $32, %r10
+movl %eax, 336(%rsp)
+movl %r8d, 4+336(%rsp)
+movl %r9d, 8+336(%rsp)
+movl %r10d, 12+336(%rsp)
+movq %rbx, 48(%rsp)
+movq 64(%rsp), %rax
+movdqa 128(%rsp), %xmm0
+movdqa 144(%rsp), %xmm1
+movdqa 160(%rsp), %xmm2
+movdqa 176(%rsp), %xmm3
+movdqa 192(%rsp), %xmm4
+movdqa 208(%rsp), %xmm5
+movdqa 224(%rsp), %xmm6
+movdqa 240(%rsp), %xmm7
+movdqa 256(%rsp), %xmm8
+movdqa 272(%rsp), %xmm9
+movdqa 288(%rsp), %xmm10
+movdqa 304(%rsp), %xmm11
+movdqa 320(%rsp), %xmm12
+movdqa 336(%rsp), %xmm13
+movdqa 352(%rsp), %xmm14
+movdqa 368(%rsp), %xmm15
+chacha_blocks_sse2_mainloop1:
+paddd %xmm4, %xmm0
+paddd %xmm5, %xmm1
+pxor %xmm0, %xmm12
+pxor %xmm1, %xmm13
+paddd %xmm6, %xmm2
+paddd %xmm7, %xmm3
+movdqa %xmm6, 96(%rsp)
+pxor %xmm2, %xmm14
+pxor %xmm3, %xmm15
+pshuflw $0xb1,%xmm12,%xmm12
+pshufhw $0xb1,%xmm12,%xmm12
+pshuflw $0xb1,%xmm13,%xmm13
+pshufhw $0xb1,%xmm13,%xmm13
+pshuflw $0xb1,%xmm14,%xmm14
+pshufhw $0xb1,%xmm14,%xmm14
+pshuflw $0xb1,%xmm15,%xmm15
+pshufhw $0xb1,%xmm15,%xmm15
+paddd %xmm12, %xmm8
+paddd %xmm13, %xmm9
+paddd %xmm14, %xmm10
+paddd %xmm15, %xmm11
+movdqa %xmm12, 112(%rsp)
+pxor %xmm8, %xmm4
+pxor %xmm9, %xmm5
+movdqa 96(%rsp), %xmm6
+movdqa %xmm4, %xmm12
+pslld $ 12, %xmm4
+psrld $20, %xmm12
+pxor %xmm12, %xmm4
+movdqa %xmm5, %xmm12
+pslld $ 12, %xmm5
+psrld $20, %xmm12
+pxor %xmm12, %xmm5
+pxor %xmm10, %xmm6
+pxor %xmm11, %xmm7
+movdqa %xmm6, %xmm12
+pslld $ 12, %xmm6
+psrld $20, %xmm12
+pxor %xmm12, %xmm6
+movdqa %xmm7, %xmm12
+pslld $ 12, %xmm7
+psrld $20, %xmm12
+pxor %xmm12, %xmm7
+movdqa 112(%rsp), %xmm12
+paddd %xmm4, %xmm0
+paddd %xmm5, %xmm1
+pxor %xmm0, %xmm12
+pxor %xmm1, %xmm13
+paddd %xmm6, %xmm2
+paddd %xmm7, %xmm3
+movdqa %xmm6, 96(%rsp)
+pxor %xmm2, %xmm14
+pxor %xmm3, %xmm15
+movdqa %xmm12, %xmm6
+pslld $ 8, %xmm12
+psrld $24, %xmm6
+pxor %xmm6, %xmm12
+movdqa %xmm13, %xmm6
+pslld $ 8, %xmm13
+psrld $24, %xmm6
+pxor %xmm6, %xmm13
+paddd %xmm12, %xmm8
+paddd %xmm13, %xmm9
+movdqa %xmm14, %xmm6
+pslld $ 8, %xmm14
+psrld $24, %xmm6
+pxor %xmm6, %xmm14
+movdqa %xmm15, %xmm6
+pslld $ 8, %xmm15
+psrld $24, %xmm6
+pxor %xmm6, %xmm15
+paddd %xmm14, %xmm10
+paddd %xmm15, %xmm11
+movdqa %xmm12, 112(%rsp)
+pxor %xmm8, %xmm4
+pxor %xmm9, %xmm5
+movdqa 96(%rsp), %xmm6
+movdqa %xmm4, %xmm12
+pslld $ 7, %xmm4
+psrld $25, %xmm12
+pxor %xmm12, %xmm4
+movdqa %xmm5, %xmm12
+pslld $ 7, %xmm5
+psrld $25, %xmm12
+pxor %xmm12, %xmm5
+pxor %xmm10, %xmm6
+pxor %xmm11, %xmm7
+movdqa %xmm6, %xmm12
+pslld $ 7, %xmm6
+psrld $25, %xmm12
+pxor %xmm12, %xmm6
+movdqa %xmm7, %xmm12
+pslld $ 7, %xmm7
+psrld $25, %xmm12
+pxor %xmm12, %xmm7
+movdqa 112(%rsp), %xmm12
+paddd %xmm5, %xmm0
+paddd %xmm6, %xmm1
+pxor %xmm0, %xmm15
+pxor %xmm1, %xmm12
+paddd %xmm7, %xmm2
+paddd %xmm4, %xmm3
+movdqa %xmm7, 96(%rsp)
+pxor %xmm2, %xmm13
+pxor %xmm3, %xmm14
+pshuflw $0xb1,%xmm15,%xmm15
+pshufhw $0xb1,%xmm15,%xmm15
+pshuflw $0xb1,%xmm12,%xmm12
+pshufhw $0xb1,%xmm12,%xmm12
+pshuflw $0xb1,%xmm13,%xmm13
+pshufhw $0xb1,%xmm13,%xmm13
+pshuflw $0xb1,%xmm14,%xmm14
+pshufhw $0xb1,%xmm14,%xmm14
+paddd %xmm15, %xmm10
+paddd %xmm12, %xmm11
+paddd %xmm13, %xmm8
+paddd %xmm14, %xmm9
+movdqa %xmm15, 112(%rsp)
+pxor %xmm10, %xmm5
+pxor %xmm11, %xmm6
+movdqa 96(%rsp), %xmm7
+movdqa %xmm5, %xmm15
+pslld $ 12, %xmm5
+psrld $20, %xmm15
+pxor %xmm15, %xmm5
+movdqa %xmm6, %xmm15
+pslld $ 12, %xmm6
+psrld $20, %xmm15
+pxor %xmm15, %xmm6
+pxor %xmm8, %xmm7
+pxor %xmm9, %xmm4
+movdqa %xmm7, %xmm15
+pslld $ 12, %xmm7
+psrld $20, %xmm15
+pxor %xmm15, %xmm7
+movdqa %xmm4, %xmm15
+pslld $ 12, %xmm4
+psrld $20, %xmm15
+pxor %xmm15, %xmm4
+movdqa 112(%rsp), %xmm15
+paddd %xmm5, %xmm0
+paddd %xmm6, %xmm1
+pxor %xmm0, %xmm15
+pxor %xmm1, %xmm12
+paddd %xmm7, %xmm2
+paddd %xmm4, %xmm3
+movdqa %xmm7, 96(%rsp)
+pxor %xmm2, %xmm13
+pxor %xmm3, %xmm14
+movdqa %xmm15, %xmm7
+pslld $ 8, %xmm15
+psrld $24, %xmm7
+pxor %xmm7, %xmm15
+movdqa %xmm12, %xmm7
+pslld $ 8, %xmm12
+psrld $24, %xmm7
+pxor %xmm7, %xmm12
+paddd %xmm15, %xmm10
+paddd %xmm12, %xmm11
+movdqa %xmm13, %xmm7
+pslld $ 8, %xmm13
+psrld $24, %xmm7
+pxor %xmm7, %xmm13
+movdqa %xmm14, %xmm7
+pslld $ 8, %xmm14
+psrld $24, %xmm7
+pxor %xmm7, %xmm14
+paddd %xmm13, %xmm8
+paddd %xmm14, %xmm9
+movdqa %xmm15, 112(%rsp)
+pxor %xmm10, %xmm5
+pxor %xmm11, %xmm6
+movdqa 96(%rsp), %xmm7
+movdqa %xmm5, %xmm15
+pslld $ 7, %xmm5
+psrld $25, %xmm15
+pxor %xmm15, %xmm5
+movdqa %xmm6, %xmm15
+pslld $ 7, %xmm6
+psrld $25, %xmm15
+pxor %xmm15, %xmm6
+pxor %xmm8, %xmm7
+pxor %xmm9, %xmm4
+movdqa %xmm7, %xmm15
+pslld $ 7, %xmm7
+psrld $25, %xmm15
+pxor %xmm15, %xmm7
+movdqa %xmm4, %xmm15
+pslld $ 7, %xmm4
+psrld $25, %xmm15
+pxor %xmm15, %xmm4
+movdqa 112(%rsp), %xmm15
+subq $2, %rax
+jnz chacha_blocks_sse2_mainloop1
+paddd 128(%rsp), %xmm0
+paddd 144(%rsp), %xmm1
+paddd 160(%rsp), %xmm2
+paddd 176(%rsp), %xmm3
+paddd 192(%rsp), %xmm4
+paddd 208(%rsp), %xmm5
+paddd 224(%rsp), %xmm6
+paddd 240(%rsp), %xmm7
+paddd 256(%rsp), %xmm8
+paddd 272(%rsp), %xmm9
+paddd 288(%rsp), %xmm10
+paddd 304(%rsp), %xmm11
+paddd 320(%rsp), %xmm12
+paddd 336(%rsp), %xmm13
+paddd 352(%rsp), %xmm14
+paddd 368(%rsp), %xmm15
+movdqa %xmm8, 384(%rsp)
+movdqa %xmm9, 400(%rsp)
+movdqa %xmm10, 416(%rsp)
+movdqa %xmm11, 432(%rsp)
+movdqa %xmm12, 448(%rsp)
+movdqa %xmm13, 464(%rsp)
+movdqa %xmm14, 480(%rsp)
+movdqa %xmm15, 496(%rsp)
+movdqa %xmm0, %xmm8
+movdqa %xmm2, %xmm9
+movdqa %xmm4, %xmm10
+movdqa %xmm6, %xmm11
+punpckhdq %xmm1, %xmm0
+punpckhdq %xmm3, %xmm2
+punpckhdq %xmm5, %xmm4
+punpckhdq %xmm7, %xmm6
+punpckldq %xmm1, %xmm8
+punpckldq %xmm3, %xmm9
+punpckldq %xmm5, %xmm10
+punpckldq %xmm7, %xmm11
+movdqa %xmm0, %xmm1
+movdqa %xmm4, %xmm3
+movdqa %xmm8, %xmm5
+movdqa %xmm10, %xmm7
+punpckhqdq %xmm2, %xmm0
+punpckhqdq %xmm6, %xmm4
+punpckhqdq %xmm9, %xmm8
+punpckhqdq %xmm11, %xmm10
+punpcklqdq %xmm2, %xmm1
+punpcklqdq %xmm6, %xmm3
+punpcklqdq %xmm9, %xmm5
+punpcklqdq %xmm11, %xmm7
+andq %rsi, %rsi
+jz chacha_blocks_sse2_noinput1
+movdqu 0(%rsi), %xmm2
+movdqu 16(%rsi), %xmm6
+movdqu 64(%rsi), %xmm9
+movdqu 80(%rsi), %xmm11
+movdqu 128(%rsi), %xmm12
+movdqu 144(%rsi), %xmm13
+movdqu 192(%rsi), %xmm14
+movdqu 208(%rsi), %xmm15
+pxor %xmm2, %xmm5
+pxor %xmm6, %xmm7
+pxor %xmm9, %xmm8
+pxor %xmm11, %xmm10
+pxor %xmm12, %xmm1
+pxor %xmm13, %xmm3
+pxor %xmm14, %xmm0
+pxor %xmm15, %xmm4
+movdqu %xmm5, 0(%rdx)
+movdqu %xmm7, 16(%rdx)
+movdqu %xmm8, 64(%rdx)
+movdqu %xmm10, 80(%rdx)
+movdqu %xmm1, 128(%rdx)
+movdqu %xmm3, 144(%rdx)
+movdqu %xmm0, 192(%rdx)
+movdqu %xmm4, 208(%rdx)
+movdqa 384(%rsp), %xmm0
+movdqa 400(%rsp), %xmm1
+movdqa 416(%rsp), %xmm2
+movdqa 432(%rsp), %xmm3
+movdqa 448(%rsp), %xmm4
+movdqa 464(%rsp), %xmm5
+movdqa 480(%rsp), %xmm6
+movdqa 496(%rsp), %xmm7
+movdqa %xmm0, %xmm8
+movdqa %xmm2, %xmm9
+movdqa %xmm4, %xmm10
+movdqa %xmm6, %xmm11
+punpckldq %xmm1, %xmm8
+punpckldq %xmm3, %xmm9
+punpckhdq %xmm1, %xmm0
+punpckhdq %xmm3, %xmm2
+punpckldq %xmm5, %xmm10
+punpckldq %xmm7, %xmm11
+punpckhdq %xmm5, %xmm4
+punpckhdq %xmm7, %xmm6
+movdqa %xmm8, %xmm1
+movdqa %xmm0, %xmm3
+movdqa %xmm10, %xmm5
+movdqa %xmm4, %xmm7
+punpcklqdq %xmm9, %xmm1
+punpcklqdq %xmm11, %xmm5
+punpckhqdq %xmm9, %xmm8
+punpckhqdq %xmm11, %xmm10
+punpcklqdq %xmm2, %xmm3
+punpcklqdq %xmm6, %xmm7
+punpckhqdq %xmm2, %xmm0
+punpckhqdq %xmm6, %xmm4
+movdqu 32(%rsi), %xmm2
+movdqu 48(%rsi), %xmm6
+movdqu 96(%rsi), %xmm9
+movdqu 112(%rsi), %xmm11
+movdqu 160(%rsi), %xmm12
+movdqu 176(%rsi), %xmm13
+movdqu 224(%rsi), %xmm14
+movdqu 240(%rsi), %xmm15
+pxor %xmm2, %xmm1
+pxor %xmm6, %xmm5
+pxor %xmm9, %xmm8
+pxor %xmm11, %xmm10
+pxor %xmm12, %xmm3
+pxor %xmm13, %xmm7
+pxor %xmm14, %xmm0
+pxor %xmm15, %xmm4
+movdqu %xmm1, 32(%rdx)
+movdqu %xmm5, 48(%rdx)
+movdqu %xmm8, 96(%rdx)
+movdqu %xmm10, 112(%rdx)
+movdqu %xmm3, 160(%rdx)
+movdqu %xmm7, 176(%rdx)
+movdqu %xmm0, 224(%rdx)
+movdqu %xmm4, 240(%rdx)
+addq $256, %rsi
+jmp chacha_blocks_sse2_mainloop_cont
+chacha_blocks_sse2_noinput1:
+movdqu %xmm5, 0(%rdx)
+movdqu %xmm7, 16(%rdx)
+movdqu %xmm8, 64(%rdx)
+movdqu %xmm10, 80(%rdx)
+movdqu %xmm1, 128(%rdx)
+movdqu %xmm3, 144(%rdx)
+movdqu %xmm0, 192(%rdx)
+movdqu %xmm4, 208(%rdx)
+movdqa 384(%rsp), %xmm0
+movdqa 400(%rsp), %xmm1
+movdqa 416(%rsp), %xmm2
+movdqa 432(%rsp), %xmm3
+movdqa 448(%rsp), %xmm4
+movdqa 464(%rsp), %xmm5
+movdqa 480(%rsp), %xmm6
+movdqa 496(%rsp), %xmm7
+movdqa %xmm0, %xmm8
+movdqa %xmm2, %xmm9
+movdqa %xmm4, %xmm10
+movdqa %xmm6, %xmm11
+punpckldq %xmm1, %xmm8
+punpckldq %xmm3, %xmm9
+punpckhdq %xmm1, %xmm0
+punpckhdq %xmm3, %xmm2
+punpckldq %xmm5, %xmm10
+punpckldq %xmm7, %xmm11
+punpckhdq %xmm5, %xmm4
+punpckhdq %xmm7, %xmm6
+movdqa %xmm8, %xmm1
+movdqa %xmm0, %xmm3
+movdqa %xmm10, %xmm5
+movdqa %xmm4, %xmm7
+punpcklqdq %xmm9, %xmm1
+punpcklqdq %xmm11, %xmm5
+punpckhqdq %xmm9, %xmm8
+punpckhqdq %xmm11, %xmm10
+punpcklqdq %xmm2, %xmm3
+punpcklqdq %xmm6, %xmm7
+punpckhqdq %xmm2, %xmm0
+punpckhqdq %xmm6, %xmm4
+movdqu %xmm1, 32(%rdx)
+movdqu %xmm5, 48(%rdx)
+movdqu %xmm8, 96(%rdx)
+movdqu %xmm10, 112(%rdx)
+movdqu %xmm3, 160(%rdx)
+movdqu %xmm7, 176(%rdx)
+movdqu %xmm0, 224(%rdx)
+movdqu %xmm4, 240(%rdx)
+chacha_blocks_sse2_mainloop_cont:
+addq $256, %rdx
+subq $256, %rcx
+cmp $256, %rcx
+jae chacha_blocks_sse2_atleast256
+movdqa 0(%rsp), %xmm8
+movdqa 16(%rsp), %xmm9
+movdqa 32(%rsp), %xmm10
+movdqa 48(%rsp), %xmm11
+movq $1, %r9
+chacha_blocks_sse2_below256:
+movq %r9, %xmm5
+andq %rcx, %rcx
+jz chacha_blocks_sse2_done
+cmpq $64, %rcx
+jae chacha_blocks_sse2_above63
+movq %rdx, %r9
+andq %rsi, %rsi
+jz chacha_blocks_sse2_noinput2
+movq %rcx, %r10
+movq %rsp, %rdx
+addq %r10, %rsi
+addq %r10, %rdx
+negq %r10
+chacha_blocks_sse2_copyinput:
+movb (%rsi, %r10), %al
+movb %al, (%rdx, %r10)
+incq %r10
+jnz chacha_blocks_sse2_copyinput
+movq %rsp, %rsi
+chacha_blocks_sse2_noinput2:
+movq %rsp, %rdx
+chacha_blocks_sse2_above63:
+movdqa %xmm8, %xmm0
+movdqa %xmm9, %xmm1
+movdqa %xmm10, %xmm2
+movdqa %xmm11, %xmm3
+movq 64(%rsp), %rax
+chacha_blocks_sse2_mainloop2:
+paddd %xmm1, %xmm0
+pxor %xmm0, %xmm3
+pshuflw $0xb1,%xmm3,%xmm3
+pshufhw $0xb1,%xmm3,%xmm3
+paddd %xmm3, %xmm2
+pxor %xmm2, %xmm1
+movdqa %xmm1,%xmm4
+pslld $12, %xmm1
+psrld $20, %xmm4
+pxor %xmm4, %xmm1
+paddd %xmm1, %xmm0
+pxor %xmm0, %xmm3
+movdqa %xmm3,%xmm4
+pslld $8, %xmm3
+psrld $24, %xmm4
+pshufd $0x93,%xmm0,%xmm0
+pxor %xmm4, %xmm3
+paddd %xmm3, %xmm2
+pshufd $0x4e,%xmm3,%xmm3
+pxor %xmm2, %xmm1
+pshufd $0x39,%xmm2,%xmm2
+movdqa %xmm1,%xmm4
+pslld $7, %xmm1
+psrld $25, %xmm4
+pxor %xmm4, %xmm1
+subq $2, %rax
+paddd %xmm1, %xmm0
+pxor %xmm0, %xmm3
+pshuflw $0xb1,%xmm3,%xmm3
+pshufhw $0xb1,%xmm3,%xmm3
+paddd %xmm3, %xmm2
+pxor %xmm2, %xmm1
+movdqa %xmm1,%xmm4
+pslld $12, %xmm1
+psrld $20, %xmm4
+pxor %xmm4, %xmm1
+paddd %xmm1, %xmm0
+pxor %xmm0, %xmm3
+movdqa %xmm3,%xmm4
+pslld $8, %xmm3
+psrld $24, %xmm4
+pshufd $0x39,%xmm0,%xmm0
+pxor %xmm4, %xmm3
+paddd %xmm3, %xmm2
+pshufd $0x4e,%xmm3,%xmm3
+pxor %xmm2, %xmm1
+pshufd $0x93,%xmm2,%xmm2
+movdqa %xmm1,%xmm4
+pslld $7, %xmm1
+psrld $25, %xmm4
+pxor %xmm4, %xmm1
+jnz chacha_blocks_sse2_mainloop2
+paddd %xmm8, %xmm0
+paddd %xmm9, %xmm1
+paddd %xmm10, %xmm2
+paddd %xmm11, %xmm3
+andq %rsi, %rsi
+jz chacha_blocks_sse2_noinput3
+movdqu 0(%rsi), %xmm12
+movdqu 16(%rsi), %xmm13
+movdqu 32(%rsi), %xmm14
+movdqu 48(%rsi), %xmm15
+pxor %xmm12, %xmm0
+pxor %xmm13, %xmm1
+pxor %xmm14, %xmm2
+pxor %xmm15, %xmm3
+addq $64, %rsi
+chacha_blocks_sse2_noinput3:
+movdqu %xmm0, 0(%rdx)
+movdqu %xmm1, 16(%rdx)
+movdqu %xmm2, 32(%rdx)
+movdqu %xmm3, 48(%rdx)
+paddq %xmm5, %xmm11
+cmpq $64, %rcx
+jbe chacha_blocks_sse2_mainloop2_finishup
+addq $64, %rdx
+subq $64, %rcx
+jmp chacha_blocks_sse2_below256
+chacha_blocks_sse2_mainloop2_finishup:
+cmpq $64, %rcx
+je chacha_blocks_sse2_done
+addq %rcx, %r9
+addq %rcx, %rdx
+negq %rcx
+chacha_blocks_sse2_copyoutput:
+movb (%rdx, %rcx), %al
+movb %al, (%r9, %rcx)
+incq %rcx
+jnz chacha_blocks_sse2_copyoutput
+chacha_blocks_sse2_done:
+movdqu %xmm11, 32(%rdi)
+movq %rbp, %rsp
+popq %rbp
+popq %rbx
+ret
+FN_END chacha_blocks_sse2
+
+GLOBAL_HIDDEN_FN hchacha_sse2
+hchacha_sse2_local:
+movq $0x3320646e61707865, %rax
+movq $0x6b20657479622d32, %r8
+movd %rax, %xmm0
+movd %r8, %xmm4
+punpcklqdq %xmm4, %xmm0
+movdqu 0(%rdi), %xmm1
+movdqu 16(%rdi), %xmm2
+movdqu 0(%rsi), %xmm3
+hchacha_sse2_mainloop:
+paddd %xmm1, %xmm0
+pxor %xmm0, %xmm3
+pshuflw $0xb1,%xmm3,%xmm3
+pshufhw $0xb1,%xmm3,%xmm3
+paddd %xmm3, %xmm2
+pxor %xmm2, %xmm1
+movdqa %xmm1,%xmm4
+pslld $12, %xmm1
+psrld $20, %xmm4
+pxor %xmm4, %xmm1
+paddd %xmm1, %xmm0
+pxor %xmm0, %xmm3
+movdqa %xmm3,%xmm4
+pslld $8, %xmm3
+psrld $24, %xmm4
+pshufd $0x93,%xmm0,%xmm0
+pxor %xmm4, %xmm3
+paddd %xmm3, %xmm2
+pshufd $0x4e,%xmm3,%xmm3
+pxor %xmm2, %xmm1
+pshufd $0x39,%xmm2,%xmm2
+movdqa %xmm1,%xmm4
+pslld $7, %xmm1
+psrld $25, %xmm4
+pxor %xmm4, %xmm1
+subq $2, %rcx
+paddd %xmm1, %xmm0
+pxor %xmm0, %xmm3
+pshuflw $0xb1,%xmm3,%xmm3
+pshufhw $0xb1,%xmm3,%xmm3
+paddd %xmm3, %xmm2
+pxor %xmm2, %xmm1
+movdqa %xmm1,%xmm4
+pslld $12, %xmm1
+psrld $20, %xmm4
+pxor %xmm4, %xmm1
+paddd %xmm1, %xmm0
+pxor %xmm0, %xmm3
+movdqa %xmm3,%xmm4
+pslld $8, %xmm3
+psrld $24, %xmm4
+pshufd $0x39,%xmm0,%xmm0
+pxor %xmm4, %xmm3
+paddd %xmm3, %xmm2
+pshufd $0x4e,%xmm3,%xmm3
+pxor %xmm2, %xmm1
+pshufd $0x93,%xmm2,%xmm2
+movdqa %xmm1,%xmm4
+pslld $7, %xmm1
+psrld $25, %xmm4
+pxor %xmm4, %xmm1
+ja hchacha_sse2_mainloop
+movdqu %xmm0, 0(%rdx)
+movdqu %xmm3, 16(%rdx)
+ret
+FN_END hchacha_sse2
+
+GLOBAL_HIDDEN_FN_EXT chacha_sse2, 6, 16
+pushq %rbp
+movq %rsp, %rbp
+subq $64, %rsp
+andq $~63, %rsp
+movdqu 0(%rdi), %xmm0
+movdqu 16(%rdi), %xmm1
+movdqa %xmm0, 0(%rsp)
+movdqa %xmm1, 16(%rsp)
+xorq %rdi, %rdi
+movq %rdi, 32(%rsp)
+movq 0(%rsi), %rsi
+movq %rsi, 40(%rsp)
+movq %r9, 48(%rsp)
+movq %rsp, %rdi
+movq %rdx, %rsi
+movq %rcx, %rdx
+movq %r8, %rcx
+call chacha_blocks_sse2_local
+pxor %xmm0, %xmm0
+movdqa %xmm0, 0(%rsp)
+movdqa %xmm0, 16(%rsp)
+movdqa %xmm0, 32(%rsp)
+movq %rbp, %rsp
+popq %rbp
+ret
+FN_END chacha_sse2
+
+GLOBAL_HIDDEN_FN_EXT xchacha_sse2, 6, 16
+pushq %rbp
+pushq %rbx
+movq %rsp, %rbp
+subq $64, %rsp
+andq $~63, %rsp
+movq %rsp, %rbx
+xorq %rax, %rax
+movq %rax, 32(%rbx)
+movq 16(%rsi), %rax
+movq %rax, 40(%rbx)
+movq %r9, 48(%rbx)
+pushq %rdx
+pushq %rcx
+pushq %r8
+movq %rbx, %rdx
+movq %r9, %rcx
+call hchacha_sse2_local
+movq %rbx, %rdi
+popq %rcx
+popq %rdx
+popq %rsi
+call chacha_blocks_sse2_local
+pxor %xmm0, %xmm0
+movdqa %xmm0, 0(%rbx)
+movdqa %xmm0, 16(%rbx)
+movdqa %xmm0, 32(%rbx)
+movq %rbp, %rsp
+popq %rbx
+popq %rbp
+ret
+FN_END xchacha_sse2
diff --git a/src/libcryptobox/cryptobox.c b/src/libcryptobox/cryptobox.c
new file mode 100644
index 0000000..e118c4a
--- /dev/null
+++ b/src/libcryptobox/cryptobox.c
@@ -0,0 +1,1778 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Workaround for memset_s */
+#ifdef __APPLE__
+#define __STDC_WANT_LIB_EXT1__ 1
+#include <string.h>
+#endif
+
+#include "config.h"
+#include "cryptobox.h"
+#include "platform_config.h"
+#include "chacha20/chacha.h"
+#include "catena/catena.h"
+#include "base64/base64.h"
+#include "ottery.h"
+#include "printf.h"
+#define XXH_INLINE_ALL
+#define XXH_PRIVATE_API
+#include "xxhash.h"
+#define MUM_TARGET_INDEPENDENT_HASH 1 /* For 32/64 bit equal hashes */
+#include "../../contrib/mumhash/mum.h"
+#include "../../contrib/t1ha/t1ha.h"
+#ifdef HAVE_CPUID_H
+#include <cpuid.h>
+#endif
+#ifdef HAVE_OPENSSL
+#include <openssl/opensslv.h>
+/* Openssl >= 1.0.1d is required for GCM verification */
+#if OPENSSL_VERSION_NUMBER >= 0x1000104fL
+#define HAVE_USABLE_OPENSSL 1
+#endif
+#endif
+
+#ifdef HAVE_USABLE_OPENSSL
+#include <openssl/evp.h>
+#include <openssl/ec.h>
+#include <openssl/ecdh.h>
+#include <openssl/ecdsa.h>
+#include <openssl/rand.h>
+#define CRYPTOBOX_CURVE_NID NID_X9_62_prime256v1
+#endif
+
+#include <signal.h>
+#include <setjmp.h>
+#include <stdalign.h>
+
+#include <sodium.h>
+
+unsigned cpu_config = 0;
+
+static gboolean cryptobox_loaded = FALSE;
+
+static const guchar n0[16] = {0};
+
+#define CRYPTOBOX_ALIGNMENT 16
+#define cryptobox_align_ptr(p, a) \
+ (void *) (((uintptr_t) (p) + ((uintptr_t) a - 1)) & ~((uintptr_t) a - 1))
+
+static void
+rspamd_cryptobox_cpuid(gint cpu[4], gint info)
+{
+ guint32 __attribute__((unused)) eax, __attribute__((unused)) ecx = 0, __attribute__((unused)) ebx = 0, __attribute__((unused)) edx = 0;
+
+ eax = info;
+#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
+#if defined(__i386__) && defined(__PIC__)
+
+ /* in case of PIC under 32-bit EBX cannot be clobbered */
+
+ __asm__ volatile("movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi"
+ : "=D"(ebx),
+ "+a"(eax), "+c"(ecx), "=d"(edx));
+#else
+ __asm__ volatile("cpuid"
+ : "+b"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx));
+#endif
+
+ cpu[0] = eax;
+ cpu[1] = ebx;
+ cpu[2] = ecx;
+ cpu[3] = edx;
+#else
+ memset(cpu, 0, sizeof(gint) * 4);
+#endif
+}
+
+static sig_atomic_t ok = 0;
+static jmp_buf j;
+
+__attribute__((noreturn)) static void
+rspamd_cryptobox_ill_handler(int signo)
+{
+ ok = 0;
+ longjmp(j, -1);
+}
+
+static gboolean
+rspamd_cryptobox_test_instr(gint instr)
+{
+ void (*old_handler)(int);
+ guint32 rd;
+
+#if defined(__GNUC__)
+ ok = 1;
+ old_handler = signal(SIGILL, rspamd_cryptobox_ill_handler);
+
+ if (setjmp(j) != 0) {
+ signal(SIGILL, old_handler);
+
+ return FALSE;
+ }
+
+ switch (instr) {
+#if defined HAVE_SSE2 && defined(__x86_64__)
+ case CPUID_SSE2:
+ __asm__ volatile("psubb %xmm0, %xmm0");
+ break;
+ case CPUID_RDRAND:
+ /* Use byte code here for compatibility */
+ __asm__ volatile(".byte 0x0f,0xc7,0xf0; setc %1"
+ : "=a"(rd), "=qm"(ok)
+ :
+ : "edx");
+ break;
+#endif
+#ifdef HAVE_SSE3
+ case CPUID_SSE3:
+ __asm__ volatile("movshdup %xmm0, %xmm0");
+ break;
+#endif
+#ifdef HAVE_SSSE3
+ case CPUID_SSSE3:
+ __asm__ volatile("pshufb %xmm0, %xmm0");
+ break;
+#endif
+#ifdef HAVE_SSE41
+ case CPUID_SSE41:
+ __asm__ volatile("pcmpeqq %xmm0, %xmm0");
+ break;
+#endif
+#if defined HAVE_SSE42 && defined(__x86_64__)
+ case CPUID_SSE42:
+ __asm__ volatile("pushq %rax\n"
+ "xorq %rax, %rax\n"
+ "crc32 %rax, %rax\n"
+ "popq %rax");
+ break;
+#endif
+#ifdef HAVE_AVX
+ case CPUID_AVX:
+ __asm__ volatile("vpaddq %xmm0, %xmm0, %xmm0");
+ break;
+#endif
+#ifdef HAVE_AVX2
+ case CPUID_AVX2:
+ __asm__ volatile("vpaddq %ymm0, %ymm0, %ymm0");
+ break;
+#endif
+ default:
+ return FALSE;
+ break;
+ }
+
+ signal(SIGILL, old_handler);
+#endif
+
+ (void) rd; /* Silence warning */
+
+ /* We actually never return here if SIGILL has been caught */
+ return ok == 1;
+}
+
+struct rspamd_cryptobox_library_ctx *
+rspamd_cryptobox_init(void)
+{
+ gint cpu[4], nid;
+ const guint32 osxsave_mask = (1 << 27);
+ const guint32 fma_movbe_osxsave_mask = ((1 << 12) | (1 << 22) | (1 << 27));
+ const guint32 avx2_bmi12_mask = (1 << 5) | (1 << 3) | (1 << 8);
+ gulong bit;
+ static struct rspamd_cryptobox_library_ctx *ctx;
+ GString *buf;
+
+ if (cryptobox_loaded) {
+ /* Ignore reload attempts */
+ return ctx;
+ }
+
+ cryptobox_loaded = TRUE;
+ ctx = g_malloc0(sizeof(*ctx));
+
+ rspamd_cryptobox_cpuid(cpu, 0);
+ nid = cpu[0];
+ rspamd_cryptobox_cpuid(cpu, 1);
+
+ if (nid > 1) {
+ if ((cpu[3] & ((guint32) 1 << 26))) {
+ if (rspamd_cryptobox_test_instr(CPUID_SSE2)) {
+ cpu_config |= CPUID_SSE2;
+ }
+ }
+ if ((cpu[2] & ((guint32) 1 << 0))) {
+ if (rspamd_cryptobox_test_instr(CPUID_SSE3)) {
+ cpu_config |= CPUID_SSE3;
+ }
+ }
+ if ((cpu[2] & ((guint32) 1 << 9))) {
+ if (rspamd_cryptobox_test_instr(CPUID_SSSE3)) {
+ cpu_config |= CPUID_SSSE3;
+ }
+ }
+ if ((cpu[2] & ((guint32) 1 << 19))) {
+ if (rspamd_cryptobox_test_instr(CPUID_SSE41)) {
+ cpu_config |= CPUID_SSE41;
+ }
+ }
+ if ((cpu[2] & ((guint32) 1 << 20))) {
+ if (rspamd_cryptobox_test_instr(CPUID_SSE42)) {
+ cpu_config |= CPUID_SSE42;
+ }
+ }
+ if ((cpu[2] & ((guint32) 1 << 30))) {
+ if (rspamd_cryptobox_test_instr(CPUID_RDRAND)) {
+ cpu_config |= CPUID_RDRAND;
+ }
+ }
+
+ /* OSXSAVE */
+ if ((cpu[2] & osxsave_mask) == osxsave_mask) {
+ if ((cpu[2] & ((guint32) 1 << 28))) {
+ if (rspamd_cryptobox_test_instr(CPUID_AVX)) {
+ cpu_config |= CPUID_AVX;
+ }
+ }
+
+ if (nid >= 7 &&
+ (cpu[2] & fma_movbe_osxsave_mask) == fma_movbe_osxsave_mask) {
+ rspamd_cryptobox_cpuid(cpu, 7);
+
+ if ((cpu[1] & avx2_bmi12_mask) == avx2_bmi12_mask) {
+ if (rspamd_cryptobox_test_instr(CPUID_AVX2)) {
+ cpu_config |= CPUID_AVX2;
+ }
+ }
+ }
+ }
+ }
+
+ buf = g_string_new("");
+
+ for (bit = 0x1; bit != 0; bit <<= 1) {
+ if (cpu_config & bit) {
+ switch (bit) {
+ case CPUID_SSE2:
+ rspamd_printf_gstring(buf, "sse2, ");
+ break;
+ case CPUID_SSE3:
+ rspamd_printf_gstring(buf, "sse3, ");
+ break;
+ case CPUID_SSSE3:
+ rspamd_printf_gstring(buf, "ssse3, ");
+ break;
+ case CPUID_SSE41:
+ rspamd_printf_gstring(buf, "sse4.1, ");
+ break;
+ case CPUID_SSE42:
+ rspamd_printf_gstring(buf, "sse4.2, ");
+ break;
+ case CPUID_AVX:
+ rspamd_printf_gstring(buf, "avx, ");
+ break;
+ case CPUID_AVX2:
+ rspamd_printf_gstring(buf, "avx2, ");
+ break;
+ case CPUID_RDRAND:
+ rspamd_printf_gstring(buf, "rdrand, ");
+ break;
+ default:
+ break; /* Silence warning */
+ }
+ }
+ }
+
+ if (buf->len > 2) {
+ /* Trim last chars */
+ g_string_erase(buf, buf->len - 2, 2);
+ }
+
+ ctx->cpu_extensions = buf->str;
+ g_string_free(buf, FALSE);
+ ctx->cpu_config = cpu_config;
+ g_assert(sodium_init() != -1);
+
+ ctx->chacha20_impl = chacha_load();
+ ctx->base64_impl = base64_load();
+#if defined(HAVE_USABLE_OPENSSL) && (OPENSSL_VERSION_NUMBER < 0x10100000L || defined(LIBRESSL_VERSION_NUMBER))
+ /* Needed for old openssl api, not sure about LibreSSL */
+ ERR_load_EC_strings();
+ ERR_load_RAND_strings();
+ ERR_load_EVP_strings();
+#endif
+
+ return ctx;
+}
+
+void rspamd_cryptobox_deinit(struct rspamd_cryptobox_library_ctx *ctx)
+{
+ if (ctx) {
+ g_free(ctx->cpu_extensions);
+ g_free(ctx);
+ }
+}
+
+void rspamd_cryptobox_keypair(rspamd_pk_t pk, rspamd_sk_t sk,
+ enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ ottery_rand_bytes(sk, rspamd_cryptobox_MAX_SKBYTES);
+ sk[0] &= 248;
+ sk[31] &= 127;
+ sk[31] |= 64;
+
+ crypto_scalarmult_base(pk, sk);
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ EC_KEY *ec_sec;
+ const BIGNUM *bn_sec;
+
+ const EC_POINT *ec_pub;
+ gsize len;
+
+ ec_sec = EC_KEY_new_by_curve_name(CRYPTOBOX_CURVE_NID);
+ g_assert(ec_sec != NULL);
+ g_assert(EC_KEY_generate_key(ec_sec) != 0);
+
+ bn_sec = EC_KEY_get0_private_key(ec_sec);
+ g_assert(bn_sec != NULL);
+ ec_pub = EC_KEY_get0_public_key(ec_sec);
+ g_assert(ec_pub != NULL);
+#if OPENSSL_VERSION_MAJOR >= 3
+ unsigned char *buf = NULL; /* Thanks openssl for this API (no) */
+ len = EC_POINT_point2buf(EC_KEY_get0_group(ec_sec), ec_pub,
+ POINT_CONVERSION_UNCOMPRESSED, &buf, NULL);
+ g_assert(len <= (gint) rspamd_cryptobox_pk_bytes(mode));
+ memcpy(pk, buf, len);
+ OPENSSL_free(buf);
+#else
+ BIGNUM *bn_pub;
+ bn_pub = EC_POINT_point2bn(EC_KEY_get0_group(ec_sec),
+ ec_pub, POINT_CONVERSION_UNCOMPRESSED, NULL, NULL);
+ len = BN_num_bytes(bn_pub);
+ g_assert(len <= (gint) rspamd_cryptobox_pk_bytes(mode));
+ BN_bn2bin(bn_pub, pk);
+ BN_free(bn_pub);
+#endif
+
+ len = BN_num_bytes(bn_sec);
+ g_assert(len <= (gint) sizeof(rspamd_sk_t));
+ BN_bn2bin(bn_sec, sk);
+
+ EC_KEY_free(ec_sec);
+#endif
+ }
+}
+
+void rspamd_cryptobox_keypair_sig(rspamd_sig_pk_t pk, rspamd_sig_sk_t sk,
+ enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ crypto_sign_keypair(pk, sk);
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ EC_KEY *ec_sec;
+ const BIGNUM *bn_sec;
+ const EC_POINT *ec_pub;
+ gsize len;
+
+ ec_sec = EC_KEY_new_by_curve_name(CRYPTOBOX_CURVE_NID);
+ g_assert(ec_sec != NULL);
+ g_assert(EC_KEY_generate_key(ec_sec) != 0);
+
+ bn_sec = EC_KEY_get0_private_key(ec_sec);
+ g_assert(bn_sec != NULL);
+ ec_pub = EC_KEY_get0_public_key(ec_sec);
+ g_assert(ec_pub != NULL);
+
+#if OPENSSL_VERSION_MAJOR >= 3
+ unsigned char *buf = NULL; /* Thanks openssl for this API (no) */
+ len = EC_POINT_point2buf(EC_KEY_get0_group(ec_sec), ec_pub,
+ POINT_CONVERSION_UNCOMPRESSED, &buf, NULL);
+ g_assert(len <= (gint) rspamd_cryptobox_pk_bytes(mode));
+ memcpy(pk, buf, len);
+ OPENSSL_free(buf);
+#else
+ BIGNUM *bn_pub;
+ bn_pub = EC_POINT_point2bn(EC_KEY_get0_group(ec_sec),
+ ec_pub, POINT_CONVERSION_UNCOMPRESSED, NULL, NULL);
+ len = BN_num_bytes(bn_pub);
+ g_assert(len <= (gint) rspamd_cryptobox_pk_bytes(mode));
+ BN_bn2bin(bn_pub, pk);
+ BN_free(bn_pub);
+#endif
+
+ len = BN_num_bytes(bn_sec);
+ g_assert(len <= (gint) sizeof(rspamd_sk_t));
+ BN_bn2bin(bn_sec, sk);
+ EC_KEY_free(ec_sec);
+#endif
+ }
+}
+
+#if OPENSSL_VERSION_MAJOR >= 3
+/* Compatibility function for OpenSSL 3.0 - thanks for breaking all API one more time */
+EC_POINT *ec_point_bn2point_compat(const EC_GROUP *group,
+ const BIGNUM *bn, EC_POINT *point, BN_CTX *ctx)
+{
+ size_t buf_len = 0;
+ unsigned char *buf;
+ EC_POINT *ret;
+
+ if ((buf_len = BN_num_bytes(bn)) == 0)
+ buf_len = 1;
+ if ((buf = OPENSSL_malloc(buf_len)) == NULL) {
+ return NULL;
+ }
+
+ if (!BN_bn2binpad(bn, buf, buf_len)) {
+ OPENSSL_free(buf);
+ return NULL;
+ }
+
+ if (point == NULL) {
+ if ((ret = EC_POINT_new(group)) == NULL) {
+ OPENSSL_free(buf);
+ return NULL;
+ }
+ }
+ else
+ ret = point;
+
+ if (!EC_POINT_oct2point(group, ret, buf, buf_len, ctx)) {
+ if (ret != point)
+ EC_POINT_clear_free(ret);
+ OPENSSL_free(buf);
+ return NULL;
+ }
+
+ OPENSSL_free(buf);
+ return ret;
+}
+#else
+#define ec_point_bn2point_compat EC_POINT_bn2point
+#endif
+
+void rspamd_cryptobox_nm(rspamd_nm_t nm,
+ const rspamd_pk_t pk, const rspamd_sk_t sk,
+ enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ guchar s[32];
+ guchar e[32];
+
+ memcpy(e, sk, 32);
+ e[0] &= 248;
+ e[31] &= 127;
+ e[31] |= 64;
+
+ if (crypto_scalarmult(s, e, pk) != -1) {
+ hchacha(s, n0, nm, 20);
+ }
+
+ rspamd_explicit_memzero(e, 32);
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ EC_KEY *lk;
+ EC_POINT *ec_pub;
+ BIGNUM *bn_pub, *bn_sec;
+ gint len;
+ guchar s[32];
+
+ lk = EC_KEY_new_by_curve_name(CRYPTOBOX_CURVE_NID);
+ g_assert(lk != NULL);
+
+ bn_pub = BN_bin2bn(pk, rspamd_cryptobox_pk_bytes(mode), NULL);
+ g_assert(bn_pub != NULL);
+ bn_sec = BN_bin2bn(sk, sizeof(rspamd_sk_t), NULL);
+ g_assert(bn_sec != NULL);
+
+ g_assert(EC_KEY_set_private_key(lk, bn_sec) == 1);
+ ec_pub = ec_point_bn2point_compat(EC_KEY_get0_group(lk), bn_pub, NULL, NULL);
+ g_assert(ec_pub != NULL);
+ len = ECDH_compute_key(s, sizeof(s), ec_pub, lk, NULL);
+ g_assert(len == sizeof(s));
+
+ /* Still do hchacha iteration since we are not using SHA1 KDF */
+ hchacha(s, n0, nm, 20);
+
+ EC_KEY_free(lk);
+ EC_POINT_free(ec_pub);
+ BN_free(bn_sec);
+ BN_free(bn_pub);
+#endif
+ }
+}
+
+void rspamd_cryptobox_sign(guchar *sig, unsigned long long *siglen_p,
+ const guchar *m, gsize mlen,
+ const rspamd_sk_t sk,
+ enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ crypto_sign_detached(sig, siglen_p, m, mlen, sk);
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ EC_KEY *lk;
+ BIGNUM *bn_sec;
+ EVP_MD_CTX *sha_ctx;
+ unsigned char h[64];
+ guint diglen = rspamd_cryptobox_signature_bytes(mode);
+
+ /* Prehash */
+ sha_ctx = EVP_MD_CTX_create();
+ g_assert(EVP_DigestInit(sha_ctx, EVP_sha512()) == 1);
+ EVP_DigestUpdate(sha_ctx, m, mlen);
+ EVP_DigestFinal(sha_ctx, h, NULL);
+
+ /* Key setup */
+ lk = EC_KEY_new_by_curve_name(CRYPTOBOX_CURVE_NID);
+ g_assert(lk != NULL);
+ bn_sec = BN_bin2bn(sk, sizeof(rspamd_sk_t), NULL);
+ g_assert(bn_sec != NULL);
+ g_assert(EC_KEY_set_private_key(lk, bn_sec) == 1);
+
+ /* ECDSA */
+ g_assert(ECDSA_sign(0, h, sizeof(h), sig, &diglen, lk) == 1);
+ g_assert(diglen <= sizeof(rspamd_signature_t));
+
+ if (siglen_p) {
+ *siglen_p = diglen;
+ }
+
+ EC_KEY_free(lk);
+ EVP_MD_CTX_destroy(sha_ctx);
+ BN_free(bn_sec);
+#endif
+ }
+}
+
+bool rspamd_cryptobox_verify(const guchar *sig,
+ gsize siglen,
+ const guchar *m,
+ gsize mlen,
+ const rspamd_pk_t pk,
+ enum rspamd_cryptobox_mode mode)
+{
+ bool ret = false;
+
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ if (siglen == rspamd_cryptobox_signature_bytes(RSPAMD_CRYPTOBOX_MODE_25519)) {
+ ret = (crypto_sign_verify_detached(sig, m, mlen, pk) == 0);
+ }
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ EC_KEY *lk;
+ EC_POINT *ec_pub;
+ BIGNUM *bn_pub;
+ EVP_MD_CTX *sha_ctx;
+ unsigned char h[64];
+
+ /* Prehash */
+ sha_ctx = EVP_MD_CTX_create();
+ g_assert(EVP_DigestInit(sha_ctx, EVP_sha512()) == 1);
+ EVP_DigestUpdate(sha_ctx, m, mlen);
+ EVP_DigestFinal(sha_ctx, h, NULL);
+
+ /* Key setup */
+ lk = EC_KEY_new_by_curve_name(CRYPTOBOX_CURVE_NID);
+ g_assert(lk != NULL);
+ bn_pub = BN_bin2bn(pk, rspamd_cryptobox_pk_bytes(mode), NULL);
+ g_assert(bn_pub != NULL);
+ ec_pub = ec_point_bn2point_compat(EC_KEY_get0_group(lk), bn_pub, NULL, NULL);
+ g_assert(ec_pub != NULL);
+ g_assert(EC_KEY_set_public_key(lk, ec_pub) == 1);
+
+ /* ECDSA */
+ ret = ECDSA_verify(0, h, sizeof(h), sig, siglen, lk) == 1;
+
+ EC_KEY_free(lk);
+ EVP_MD_CTX_destroy(sha_ctx);
+ BN_free(bn_pub);
+ EC_POINT_free(ec_pub);
+#endif
+ }
+
+ return ret;
+}
+
+static gsize
+rspamd_cryptobox_encrypt_ctx_len(enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ return sizeof(chacha_state) + CRYPTOBOX_ALIGNMENT;
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ return sizeof(EVP_CIPHER_CTX *) + CRYPTOBOX_ALIGNMENT;
+#endif
+ }
+
+ return 0;
+}
+
+static gsize
+rspamd_cryptobox_auth_ctx_len(enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ return sizeof(crypto_onetimeauth_state) + RSPAMD_ALIGNOF(crypto_onetimeauth_state);
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ return sizeof(void *);
+#endif
+ }
+
+ return 0;
+}
+
+static void *
+rspamd_cryptobox_encrypt_init(void *enc_ctx, const rspamd_nonce_t nonce,
+ const rspamd_nm_t nm,
+ enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ chacha_state *s;
+
+ s = cryptobox_align_ptr(enc_ctx, CRYPTOBOX_ALIGNMENT);
+ xchacha_init(s,
+ (const chacha_key *) nm,
+ (const chacha_iv24 *) nonce,
+ 20);
+
+ return s;
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ EVP_CIPHER_CTX **s;
+
+ s = cryptobox_align_ptr(enc_ctx, CRYPTOBOX_ALIGNMENT);
+ memset(s, 0, sizeof(*s));
+ *s = EVP_CIPHER_CTX_new();
+ g_assert(EVP_EncryptInit_ex(*s, EVP_aes_256_gcm(), NULL, NULL, NULL) == 1);
+ g_assert(EVP_CIPHER_CTX_ctrl(*s, EVP_CTRL_GCM_SET_IVLEN,
+ rspamd_cryptobox_nonce_bytes(mode), NULL) == 1);
+ g_assert(EVP_EncryptInit_ex(*s, NULL, NULL, nm, nonce) == 1);
+
+ return s;
+#endif
+ }
+
+ return NULL;
+}
+
+static void *
+rspamd_cryptobox_auth_init(void *auth_ctx, void *enc_ctx,
+ enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ crypto_onetimeauth_state *mac_ctx;
+ guchar RSPAMD_ALIGNED(32) subkey[CHACHA_BLOCKBYTES];
+
+ mac_ctx = cryptobox_align_ptr(auth_ctx, CRYPTOBOX_ALIGNMENT);
+ memset(subkey, 0, sizeof(subkey));
+ chacha_update(enc_ctx, subkey, subkey, sizeof(subkey));
+ crypto_onetimeauth_init(mac_ctx, subkey);
+ rspamd_explicit_memzero(subkey, sizeof(subkey));
+
+ return mac_ctx;
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ auth_ctx = enc_ctx;
+
+ return auth_ctx;
+#endif
+ }
+
+ return NULL;
+}
+
+static gboolean
+rspamd_cryptobox_encrypt_update(void *enc_ctx, const guchar *in, gsize inlen,
+ guchar *out, gsize *outlen,
+ enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ gsize r;
+ chacha_state *s;
+
+ s = cryptobox_align_ptr(enc_ctx, CRYPTOBOX_ALIGNMENT);
+
+ r = chacha_update(s, in, out, inlen);
+
+ if (outlen != NULL) {
+ *outlen = r;
+ }
+
+ return TRUE;
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ EVP_CIPHER_CTX **s = enc_ctx;
+ gint r;
+
+ r = inlen;
+ g_assert(EVP_EncryptUpdate(*s, out, &r, in, inlen) == 1);
+
+ if (outlen) {
+ *outlen = r;
+ }
+
+ return TRUE;
+#endif
+ }
+
+ return FALSE;
+}
+
+static gboolean
+rspamd_cryptobox_auth_update(void *auth_ctx, const guchar *in, gsize inlen,
+ enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ crypto_onetimeauth_state *mac_ctx;
+
+ mac_ctx = cryptobox_align_ptr(auth_ctx, CRYPTOBOX_ALIGNMENT);
+ crypto_onetimeauth_update(mac_ctx, in, inlen);
+
+ return TRUE;
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ return TRUE;
+#endif
+ }
+
+ return FALSE;
+}
+
+static gsize
+rspamd_cryptobox_encrypt_final(void *enc_ctx, guchar *out, gsize remain,
+ enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ chacha_state *s;
+
+ s = cryptobox_align_ptr(enc_ctx, CRYPTOBOX_ALIGNMENT);
+ return chacha_final(s, out);
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ EVP_CIPHER_CTX **s = enc_ctx;
+ gint r = remain;
+
+ g_assert(EVP_EncryptFinal_ex(*s, out, &r) == 1);
+
+ return r;
+#endif
+ }
+
+ return 0;
+}
+
+static gboolean
+rspamd_cryptobox_auth_final(void *auth_ctx, rspamd_mac_t sig,
+ enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ crypto_onetimeauth_state *mac_ctx;
+
+ mac_ctx = cryptobox_align_ptr(auth_ctx, CRYPTOBOX_ALIGNMENT);
+ crypto_onetimeauth_final(mac_ctx, sig);
+
+ return TRUE;
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ EVP_CIPHER_CTX **s = auth_ctx;
+
+ g_assert(EVP_CIPHER_CTX_ctrl(*s, EVP_CTRL_GCM_GET_TAG,
+ sizeof(rspamd_mac_t), sig) == 1);
+
+ return TRUE;
+#endif
+ }
+
+ return FALSE;
+}
+
+static void *
+rspamd_cryptobox_decrypt_init(void *enc_ctx, const rspamd_nonce_t nonce,
+ const rspamd_nm_t nm,
+ enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+
+ chacha_state *s;
+
+ s = cryptobox_align_ptr(enc_ctx, CRYPTOBOX_ALIGNMENT);
+ xchacha_init(s,
+ (const chacha_key *) nm,
+ (const chacha_iv24 *) nonce,
+ 20);
+
+ return s;
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ EVP_CIPHER_CTX **s;
+
+ s = cryptobox_align_ptr(enc_ctx, CRYPTOBOX_ALIGNMENT);
+ memset(s, 0, sizeof(*s));
+ *s = EVP_CIPHER_CTX_new();
+ g_assert(EVP_DecryptInit_ex(*s, EVP_aes_256_gcm(), NULL, NULL, NULL) == 1);
+ g_assert(EVP_CIPHER_CTX_ctrl(*s, EVP_CTRL_GCM_SET_IVLEN,
+ rspamd_cryptobox_nonce_bytes(mode), NULL) == 1);
+ g_assert(EVP_DecryptInit_ex(*s, NULL, NULL, nm, nonce) == 1);
+
+ return s;
+#endif
+ }
+
+ return NULL;
+}
+
+static void *
+rspamd_cryptobox_auth_verify_init(void *auth_ctx, void *enc_ctx,
+ enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ crypto_onetimeauth_state *mac_ctx;
+ guchar RSPAMD_ALIGNED(32) subkey[CHACHA_BLOCKBYTES];
+
+ mac_ctx = cryptobox_align_ptr(auth_ctx, CRYPTOBOX_ALIGNMENT);
+ memset(subkey, 0, sizeof(subkey));
+ chacha_update(enc_ctx, subkey, subkey, sizeof(subkey));
+ crypto_onetimeauth_init(mac_ctx, subkey);
+ rspamd_explicit_memzero(subkey, sizeof(subkey));
+
+ return mac_ctx;
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ auth_ctx = enc_ctx;
+
+ return auth_ctx;
+#endif
+ }
+
+ return NULL;
+}
+
+static gboolean
+rspamd_cryptobox_decrypt_update(void *enc_ctx, const guchar *in, gsize inlen,
+ guchar *out, gsize *outlen,
+ enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ gsize r;
+ chacha_state *s;
+
+ s = cryptobox_align_ptr(enc_ctx, CRYPTOBOX_ALIGNMENT);
+ r = chacha_update(s, in, out, inlen);
+
+ if (outlen != NULL) {
+ *outlen = r;
+ }
+
+ return TRUE;
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ EVP_CIPHER_CTX **s = enc_ctx;
+ gint r;
+
+ r = outlen ? *outlen : inlen;
+ g_assert(EVP_DecryptUpdate(*s, out, &r, in, inlen) == 1);
+
+ if (outlen) {
+ *outlen = r;
+ }
+
+ return TRUE;
+#endif
+ }
+}
+
+static gboolean
+rspamd_cryptobox_auth_verify_update(void *auth_ctx,
+ const guchar *in, gsize inlen,
+ enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ crypto_onetimeauth_state *mac_ctx;
+
+ mac_ctx = cryptobox_align_ptr(auth_ctx, CRYPTOBOX_ALIGNMENT);
+ crypto_onetimeauth_update(mac_ctx, in, inlen);
+
+ return TRUE;
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ /* We do not need to authenticate as a separate process */
+ return TRUE;
+#else
+#endif
+ }
+
+ return FALSE;
+}
+
+static gboolean
+rspamd_cryptobox_decrypt_final(void *enc_ctx, guchar *out, gsize remain,
+ enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ chacha_state *s;
+
+ s = cryptobox_align_ptr(enc_ctx, CRYPTOBOX_ALIGNMENT);
+ chacha_final(s, out);
+
+ return TRUE;
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ EVP_CIPHER_CTX **s = enc_ctx;
+ gint r = remain;
+
+ if (EVP_DecryptFinal_ex(*s, out, &r) < 0) {
+ return FALSE;
+ }
+
+ return TRUE;
+#endif
+ }
+
+ return FALSE;
+}
+
+static gboolean
+rspamd_cryptobox_auth_verify_final(void *auth_ctx, const rspamd_mac_t sig,
+ enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ rspamd_mac_t mac;
+ crypto_onetimeauth_state *mac_ctx;
+
+ mac_ctx = cryptobox_align_ptr(auth_ctx, CRYPTOBOX_ALIGNMENT);
+ crypto_onetimeauth_final(mac_ctx, mac);
+
+ if (crypto_verify_16(mac, sig) != 0) {
+ return FALSE;
+ }
+
+ return TRUE;
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ EVP_CIPHER_CTX **s = auth_ctx;
+
+ if (EVP_CIPHER_CTX_ctrl(*s, EVP_CTRL_GCM_SET_TAG, 16, (guchar *) sig) != 1) {
+ return FALSE;
+ }
+
+ return TRUE;
+#endif
+ }
+
+ return FALSE;
+}
+
+
+static void
+rspamd_cryptobox_cleanup(void *enc_ctx, void *auth_ctx,
+ enum rspamd_cryptobox_mode mode)
+{
+ if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ crypto_onetimeauth_state *mac_ctx;
+
+ mac_ctx = cryptobox_align_ptr(auth_ctx, CRYPTOBOX_ALIGNMENT);
+ rspamd_explicit_memzero(mac_ctx, sizeof(*mac_ctx));
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ EVP_CIPHER_CTX **s = enc_ctx;
+
+ EVP_CIPHER_CTX_cleanup(*s);
+ EVP_CIPHER_CTX_free(*s);
+#endif
+ }
+}
+
+void rspamd_cryptobox_encrypt_nm_inplace(guchar *data, gsize len,
+ const rspamd_nonce_t nonce,
+ const rspamd_nm_t nm,
+ rspamd_mac_t sig,
+ enum rspamd_cryptobox_mode mode)
+{
+ gsize r;
+ void *enc_ctx, *auth_ctx;
+
+ enc_ctx = g_alloca(rspamd_cryptobox_encrypt_ctx_len(mode));
+ auth_ctx = g_alloca(rspamd_cryptobox_auth_ctx_len(mode));
+
+ enc_ctx = rspamd_cryptobox_encrypt_init(enc_ctx, nonce, nm, mode);
+ auth_ctx = rspamd_cryptobox_auth_init(auth_ctx, enc_ctx, mode);
+
+ rspamd_cryptobox_encrypt_update(enc_ctx, data, len, data, &r, mode);
+ rspamd_cryptobox_encrypt_final(enc_ctx, data + r, len - r, mode);
+
+ rspamd_cryptobox_auth_update(auth_ctx, data, len, mode);
+ rspamd_cryptobox_auth_final(auth_ctx, sig, mode);
+
+ rspamd_cryptobox_cleanup(enc_ctx, auth_ctx, mode);
+}
+
+static void
+rspamd_cryptobox_flush_outbuf(struct rspamd_cryptobox_segment *st,
+ const guchar *buf, gsize len, gsize offset)
+{
+ gsize cpy_len;
+
+ while (len > 0) {
+ cpy_len = MIN(len, st->len - offset);
+ memcpy(st->data + offset, buf, cpy_len);
+ st++;
+ buf += cpy_len;
+ len -= cpy_len;
+ offset = 0;
+ }
+}
+
+void rspamd_cryptobox_encryptv_nm_inplace(struct rspamd_cryptobox_segment *segments,
+ gsize cnt,
+ const rspamd_nonce_t nonce,
+ const rspamd_nm_t nm, rspamd_mac_t sig,
+ enum rspamd_cryptobox_mode mode)
+{
+ struct rspamd_cryptobox_segment *cur = segments, *start_seg = segments;
+ guchar outbuf[CHACHA_BLOCKBYTES * 16];
+ void *enc_ctx, *auth_ctx;
+ guchar *out, *in;
+ gsize r, remain, inremain, seg_offset;
+
+ enc_ctx = g_alloca(rspamd_cryptobox_encrypt_ctx_len(mode));
+ auth_ctx = g_alloca(rspamd_cryptobox_auth_ctx_len(mode));
+
+ enc_ctx = rspamd_cryptobox_encrypt_init(enc_ctx, nonce, nm, mode);
+ auth_ctx = rspamd_cryptobox_auth_init(auth_ctx, enc_ctx, mode);
+
+ remain = sizeof(outbuf);
+ out = outbuf;
+ inremain = cur->len;
+ seg_offset = 0;
+
+ for (;;) {
+ if (cur - segments == (gint) cnt) {
+ break;
+ }
+
+ if (cur->len <= remain) {
+ memcpy(out, cur->data, cur->len);
+ remain -= cur->len;
+ out += cur->len;
+ cur++;
+
+ if (remain == 0) {
+ rspamd_cryptobox_encrypt_update(enc_ctx, outbuf, sizeof(outbuf),
+ outbuf, NULL, mode);
+ rspamd_cryptobox_auth_update(auth_ctx, outbuf, sizeof(outbuf),
+ mode);
+ rspamd_cryptobox_flush_outbuf(start_seg, outbuf,
+ sizeof(outbuf), seg_offset);
+ start_seg = cur;
+ seg_offset = 0;
+ remain = sizeof(outbuf);
+ out = outbuf;
+ }
+ }
+ else {
+ memcpy(out, cur->data, remain);
+ rspamd_cryptobox_encrypt_update(enc_ctx, outbuf, sizeof(outbuf),
+ outbuf, NULL, mode);
+ rspamd_cryptobox_auth_update(auth_ctx, outbuf, sizeof(outbuf),
+ mode);
+ rspamd_cryptobox_flush_outbuf(start_seg, outbuf, sizeof(outbuf),
+ seg_offset);
+ seg_offset = 0;
+
+ inremain = cur->len - remain;
+ in = cur->data + remain;
+ out = outbuf;
+ remain = 0;
+ start_seg = cur;
+
+ while (inremain > 0) {
+ if (sizeof(outbuf) <= inremain) {
+ memcpy(outbuf, in, sizeof(outbuf));
+ rspamd_cryptobox_encrypt_update(enc_ctx,
+ outbuf,
+ sizeof(outbuf),
+ outbuf,
+ NULL,
+ mode);
+ rspamd_cryptobox_auth_update(auth_ctx,
+ outbuf,
+ sizeof(outbuf),
+ mode);
+ memcpy(in, outbuf, sizeof(outbuf));
+ in += sizeof(outbuf);
+ inremain -= sizeof(outbuf);
+ remain = sizeof(outbuf);
+ }
+ else {
+ memcpy(outbuf, in, inremain);
+ remain = sizeof(outbuf) - inremain;
+ out = outbuf + inremain;
+ inremain = 0;
+ }
+ }
+
+ seg_offset = cur->len - (sizeof(outbuf) - remain);
+ cur++;
+ }
+ }
+
+ rspamd_cryptobox_encrypt_update(enc_ctx, outbuf, sizeof(outbuf) - remain,
+ outbuf, &r, mode);
+ out = outbuf + r;
+ rspamd_cryptobox_encrypt_final(enc_ctx, out, sizeof(outbuf) - remain - r,
+ mode);
+
+ rspamd_cryptobox_auth_update(auth_ctx, outbuf, sizeof(outbuf) - remain,
+ mode);
+ rspamd_cryptobox_auth_final(auth_ctx, sig, mode);
+
+ rspamd_cryptobox_flush_outbuf(start_seg, outbuf, sizeof(outbuf) - remain,
+ seg_offset);
+ rspamd_cryptobox_cleanup(enc_ctx, auth_ctx, mode);
+}
+
+gboolean
+rspamd_cryptobox_decrypt_nm_inplace(guchar *data, gsize len,
+ const rspamd_nonce_t nonce, const rspamd_nm_t nm,
+ const rspamd_mac_t sig, enum rspamd_cryptobox_mode mode)
+{
+ gsize r = 0;
+ gboolean ret = TRUE;
+ void *enc_ctx, *auth_ctx;
+
+ enc_ctx = g_alloca(rspamd_cryptobox_encrypt_ctx_len(mode));
+ auth_ctx = g_alloca(rspamd_cryptobox_auth_ctx_len(mode));
+
+ enc_ctx = rspamd_cryptobox_decrypt_init(enc_ctx, nonce, nm, mode);
+ auth_ctx = rspamd_cryptobox_auth_verify_init(auth_ctx, enc_ctx, mode);
+
+ rspamd_cryptobox_auth_verify_update(auth_ctx, data, len, mode);
+
+ if (!rspamd_cryptobox_auth_verify_final(auth_ctx, sig, mode)) {
+ ret = FALSE;
+ }
+ else {
+ rspamd_cryptobox_decrypt_update(enc_ctx, data, len, data, &r, mode);
+ ret = rspamd_cryptobox_decrypt_final(enc_ctx, data + r, len - r, mode);
+ }
+
+ rspamd_cryptobox_cleanup(enc_ctx, auth_ctx, mode);
+
+ return ret;
+}
+
+gboolean
+rspamd_cryptobox_decrypt_inplace(guchar *data, gsize len,
+ const rspamd_nonce_t nonce,
+ const rspamd_pk_t pk, const rspamd_sk_t sk,
+ const rspamd_mac_t sig,
+ enum rspamd_cryptobox_mode mode)
+{
+ guchar nm[rspamd_cryptobox_MAX_NMBYTES];
+ gboolean ret;
+
+ rspamd_cryptobox_nm(nm, pk, sk, mode);
+ ret = rspamd_cryptobox_decrypt_nm_inplace(data, len, nonce, nm, sig, mode);
+
+ rspamd_explicit_memzero(nm, sizeof(nm));
+
+ return ret;
+}
+
+void rspamd_cryptobox_encrypt_inplace(guchar *data, gsize len,
+ const rspamd_nonce_t nonce,
+ const rspamd_pk_t pk, const rspamd_sk_t sk,
+ rspamd_mac_t sig,
+ enum rspamd_cryptobox_mode mode)
+{
+ guchar nm[rspamd_cryptobox_MAX_NMBYTES];
+
+ rspamd_cryptobox_nm(nm, pk, sk, mode);
+ rspamd_cryptobox_encrypt_nm_inplace(data, len, nonce, nm, sig, mode);
+ rspamd_explicit_memzero(nm, sizeof(nm));
+}
+
+void rspamd_cryptobox_encryptv_inplace(struct rspamd_cryptobox_segment *segments,
+ gsize cnt,
+ const rspamd_nonce_t nonce,
+ const rspamd_pk_t pk, const rspamd_sk_t sk,
+ rspamd_mac_t sig,
+ enum rspamd_cryptobox_mode mode)
+{
+ guchar nm[rspamd_cryptobox_MAX_NMBYTES];
+
+ rspamd_cryptobox_nm(nm, pk, sk, mode);
+ rspamd_cryptobox_encryptv_nm_inplace(segments, cnt, nonce, nm, sig, mode);
+ rspamd_explicit_memzero(nm, sizeof(nm));
+}
+
+
+void rspamd_cryptobox_siphash(unsigned char *out, const unsigned char *in,
+ unsigned long long inlen,
+ const rspamd_sipkey_t k)
+{
+ crypto_shorthash_siphash24(out, in, inlen, k);
+}
+
+/*
+ * Password-Based Key Derivation Function 2 (PKCS #5 v2.0).
+ * Code based on IEEE Std 802.11-2007, Annex H.4.2.
+ */
+static gboolean
+rspamd_cryptobox_pbkdf2(const char *pass, gsize pass_len,
+ const guint8 *salt, gsize salt_len, guint8 *key, gsize key_len,
+ unsigned int rounds)
+{
+ guint8 *asalt, obuf[crypto_generichash_blake2b_BYTES_MAX];
+ guint8 d1[crypto_generichash_blake2b_BYTES_MAX],
+ d2[crypto_generichash_blake2b_BYTES_MAX];
+ unsigned int i, j;
+ unsigned int count;
+ gsize r;
+
+ if (rounds < 1 || key_len == 0) {
+ return FALSE;
+ }
+ if (salt_len == 0 || salt_len > G_MAXSIZE - 4) {
+ return FALSE;
+ }
+
+ asalt = g_malloc(salt_len + 4);
+ memcpy(asalt, salt, salt_len);
+
+ for (count = 1; key_len > 0; count++) {
+ asalt[salt_len + 0] = (count >> 24) & 0xff;
+ asalt[salt_len + 1] = (count >> 16) & 0xff;
+ asalt[salt_len + 2] = (count >> 8) & 0xff;
+ asalt[salt_len + 3] = count & 0xff;
+
+ if (pass_len <= crypto_generichash_blake2b_KEYBYTES_MAX) {
+ crypto_generichash_blake2b(d1, sizeof(d1), asalt, salt_len + 4,
+ pass, pass_len);
+ }
+ else {
+ guint8 k[crypto_generichash_blake2b_BYTES_MAX];
+
+ /*
+ * We use additional blake2 iteration to store large key
+ * XXX: it is not compatible with the original implementation but safe
+ */
+ crypto_generichash_blake2b(k, sizeof(k), pass, pass_len,
+ NULL, 0);
+ crypto_generichash_blake2b(d1, sizeof(d1), asalt, salt_len + 4,
+ k, sizeof(k));
+ }
+
+ memcpy(obuf, d1, sizeof(obuf));
+
+ for (i = 1; i < rounds; i++) {
+ if (pass_len <= crypto_generichash_blake2b_KEYBYTES_MAX) {
+ crypto_generichash_blake2b(d2, sizeof(d2), d1, sizeof(d1),
+ pass, pass_len);
+ }
+ else {
+ guint8 k[crypto_generichash_blake2b_BYTES_MAX];
+
+ /*
+ * We use additional blake2 iteration to store large key
+ * XXX: it is not compatible with the original implementation but safe
+ */
+ crypto_generichash_blake2b(k, sizeof(k), pass, pass_len,
+ NULL, 0);
+ crypto_generichash_blake2b(d2, sizeof(d2), d1, sizeof(d1),
+ k, sizeof(k));
+ }
+
+ memcpy(d1, d2, sizeof(d1));
+
+ for (j = 0; j < sizeof(obuf); j++) {
+ obuf[j] ^= d1[j];
+ }
+ }
+
+ r = MIN(key_len, crypto_generichash_blake2b_BYTES_MAX);
+ memcpy(key, obuf, r);
+ key += r;
+ key_len -= r;
+ }
+
+ rspamd_explicit_memzero(asalt, salt_len + 4);
+ g_free(asalt);
+ rspamd_explicit_memzero(d1, sizeof(d1));
+ rspamd_explicit_memzero(d2, sizeof(d2));
+ rspamd_explicit_memzero(obuf, sizeof(obuf));
+
+ return TRUE;
+}
+
+gboolean
+rspamd_cryptobox_pbkdf(const char *pass, gsize pass_len,
+ const guint8 *salt, gsize salt_len, guint8 *key, gsize key_len,
+ unsigned int complexity, enum rspamd_cryptobox_pbkdf_type type)
+{
+ gboolean ret = FALSE;
+
+ switch (type) {
+ case RSPAMD_CRYPTOBOX_CATENA:
+ if (catena(pass, pass_len, salt, salt_len, "rspamd", 6,
+ 4, complexity, complexity, key_len, key) == 0) {
+ ret = TRUE;
+ }
+ break;
+ case RSPAMD_CRYPTOBOX_PBKDF2:
+ default:
+ ret = rspamd_cryptobox_pbkdf2(pass, pass_len, salt, salt_len, key,
+ key_len, complexity);
+ break;
+ }
+
+ return ret;
+}
+
+guint rspamd_cryptobox_pk_bytes(enum rspamd_cryptobox_mode mode)
+{
+ if (G_UNLIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ return 32;
+ }
+ else {
+ return 65;
+ }
+}
+
+guint rspamd_cryptobox_pk_sig_bytes(enum rspamd_cryptobox_mode mode)
+{
+ if (G_UNLIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ return 32;
+ }
+ else {
+ return 65;
+ }
+}
+
+guint rspamd_cryptobox_nonce_bytes(enum rspamd_cryptobox_mode mode)
+{
+ if (G_UNLIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ return 24;
+ }
+ else {
+ return 16;
+ }
+}
+
+
+guint rspamd_cryptobox_sk_bytes(enum rspamd_cryptobox_mode mode)
+{
+ return 32;
+}
+
+guint rspamd_cryptobox_sk_sig_bytes(enum rspamd_cryptobox_mode mode)
+{
+ if (G_UNLIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ return 64;
+ }
+ else {
+ return 32;
+ }
+}
+
+guint rspamd_cryptobox_signature_bytes(enum rspamd_cryptobox_mode mode)
+{
+ static guint ssl_keylen;
+
+ if (G_UNLIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) {
+ return 64;
+ }
+ else {
+#ifndef HAVE_USABLE_OPENSSL
+ g_assert(0);
+#else
+ if (ssl_keylen == 0) {
+ EC_KEY *lk;
+ lk = EC_KEY_new_by_curve_name(CRYPTOBOX_CURVE_NID);
+ ssl_keylen = ECDSA_size(lk);
+ EC_KEY_free(lk);
+ }
+#endif
+ return ssl_keylen;
+ }
+}
+
+guint rspamd_cryptobox_nm_bytes(enum rspamd_cryptobox_mode mode)
+{
+ return 32;
+}
+
+guint rspamd_cryptobox_mac_bytes(enum rspamd_cryptobox_mode mode)
+{
+ return 16;
+}
+
+void rspamd_cryptobox_hash_init(rspamd_cryptobox_hash_state_t *p, const guchar *key, gsize keylen)
+{
+ crypto_generichash_blake2b_state *st = cryptobox_align_ptr(p,
+ RSPAMD_ALIGNOF(crypto_generichash_blake2b_state));
+ crypto_generichash_blake2b_init(st, key, keylen,
+ crypto_generichash_blake2b_BYTES_MAX);
+}
+
+/**
+ * Update hash with data portion
+ */
+void rspamd_cryptobox_hash_update(rspamd_cryptobox_hash_state_t *p, const guchar *data, gsize len)
+{
+ crypto_generichash_blake2b_state *st = cryptobox_align_ptr(p,
+ RSPAMD_ALIGNOF(crypto_generichash_blake2b_state));
+ crypto_generichash_blake2b_update(st, data, len);
+}
+
+/**
+ * Output hash to the buffer of rspamd_cryptobox_HASHBYTES length
+ */
+void rspamd_cryptobox_hash_final(rspamd_cryptobox_hash_state_t *p, guchar *out)
+{
+ crypto_generichash_blake2b_state *st = cryptobox_align_ptr(p,
+ RSPAMD_ALIGNOF(crypto_generichash_blake2b_state));
+ crypto_generichash_blake2b_final(st, out, crypto_generichash_blake2b_BYTES_MAX);
+}
+
+/**
+ * One in all function
+ */
+void rspamd_cryptobox_hash(guchar *out,
+ const guchar *data,
+ gsize len,
+ const guchar *key,
+ gsize keylen)
+{
+ crypto_generichash_blake2b(out, crypto_generichash_blake2b_BYTES_MAX,
+ data, len, key, keylen);
+}
+
+G_STATIC_ASSERT(sizeof(t1ha_context_t) <=
+ sizeof(((rspamd_cryptobox_fast_hash_state_t *) NULL)->opaque));
+G_STATIC_ASSERT(sizeof(struct XXH3_state_s) <=
+ sizeof(((rspamd_cryptobox_fast_hash_state_t *) NULL)->opaque));
+
+
+struct RSPAMD_ALIGNED(16) _mum_iuf {
+ union {
+ gint64 ll;
+ unsigned char b[sizeof(guint64)];
+ } buf;
+ gint64 h;
+ unsigned rem;
+};
+
+rspamd_cryptobox_fast_hash_state_t *
+rspamd_cryptobox_fast_hash_new(void)
+{
+ rspamd_cryptobox_fast_hash_state_t *nst;
+ int ret = posix_memalign((void **) &nst, RSPAMD_ALIGNOF(rspamd_cryptobox_fast_hash_state_t),
+ sizeof(rspamd_cryptobox_fast_hash_state_t));
+
+ if (ret != 0) {
+ abort();
+ }
+
+ return nst;
+}
+
+void rspamd_cryptobox_fast_hash_free(rspamd_cryptobox_fast_hash_state_t *st)
+{
+ free(st);
+}
+
+void rspamd_cryptobox_fast_hash_init(rspamd_cryptobox_fast_hash_state_t *st,
+ guint64 seed)
+{
+ XXH3_state_t *xst = (XXH3_state_t *) st->opaque;
+ st->type = RSPAMD_CRYPTOBOX_XXHASH3;
+ XXH3_INITSTATE(xst);
+ XXH3_64bits_reset_withSeed(xst, seed);
+}
+
+void rspamd_cryptobox_fast_hash_init_specific(rspamd_cryptobox_fast_hash_state_t *st,
+ enum rspamd_cryptobox_fast_hash_type type,
+ guint64 seed)
+{
+ switch (type) {
+ case RSPAMD_CRYPTOBOX_T1HA:
+ case RSPAMD_CRYPTOBOX_HASHFAST:
+ case RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT: {
+ t1ha_context_t *rst = (t1ha_context_t *) st->opaque;
+ st->type = RSPAMD_CRYPTOBOX_T1HA;
+ t1ha2_init(rst, seed, 0);
+ break;
+ }
+ case RSPAMD_CRYPTOBOX_XXHASH64: {
+ XXH64_state_t *xst = (XXH64_state_t *) st->opaque;
+ memset(xst, 0, sizeof(*xst));
+ st->type = RSPAMD_CRYPTOBOX_XXHASH64;
+ XXH64_reset(xst, seed);
+ break;
+ }
+ case RSPAMD_CRYPTOBOX_XXHASH32: {
+ XXH32_state_t *xst = (XXH32_state_t *) st->opaque;
+ memset(xst, 0, sizeof(*xst));
+ st->type = RSPAMD_CRYPTOBOX_XXHASH32;
+ XXH32_reset(xst, seed);
+ break;
+ }
+ case RSPAMD_CRYPTOBOX_XXHASH3: {
+ XXH3_state_t *xst = (XXH3_state_t *) st->opaque;
+ XXH3_INITSTATE(xst);
+ st->type = RSPAMD_CRYPTOBOX_XXHASH3;
+ XXH3_64bits_reset_withSeed(xst, seed);
+ break;
+ }
+ case RSPAMD_CRYPTOBOX_MUMHASH: {
+ struct _mum_iuf *iuf = (struct _mum_iuf *) st->opaque;
+ st->type = RSPAMD_CRYPTOBOX_MUMHASH;
+ iuf->h = seed;
+ iuf->buf.ll = 0;
+ iuf->rem = 0;
+ break;
+ }
+ }
+}
+
+void rspamd_cryptobox_fast_hash_update(rspamd_cryptobox_fast_hash_state_t *st,
+ const void *data, gsize len)
+{
+ if (st->type == RSPAMD_CRYPTOBOX_T1HA) {
+ t1ha_context_t *rst = (t1ha_context_t *) st->opaque;
+ t1ha2_update(rst, data, len);
+ }
+ else {
+ switch (st->type) {
+ case RSPAMD_CRYPTOBOX_XXHASH64: {
+ XXH64_state_t *xst = (XXH64_state_t *) st->opaque;
+ XXH64_update(xst, data, len);
+ break;
+ }
+ case RSPAMD_CRYPTOBOX_XXHASH32: {
+ XXH32_state_t *xst = (XXH32_state_t *) st->opaque;
+ XXH32_update(xst, data, len);
+ break;
+ }
+ case RSPAMD_CRYPTOBOX_XXHASH3: {
+ XXH3_state_t *xst = (XXH3_state_t *) st->opaque;
+ XXH3_64bits_update(xst, data, len);
+ break;
+ }
+ case RSPAMD_CRYPTOBOX_MUMHASH: {
+ struct _mum_iuf *iuf = (struct _mum_iuf *) st->opaque;
+ gsize drem = len;
+ const guchar *p = data;
+
+ if (iuf->rem > 0) {
+ /* Process remainder */
+ if (drem >= iuf->rem) {
+ memcpy(iuf->buf.b + sizeof(iuf->buf.ll) - iuf->rem,
+ p, iuf->rem);
+ drem -= iuf->rem;
+ p += iuf->rem;
+ iuf->h = mum_hash_step(iuf->h, iuf->buf.ll);
+ iuf->rem = 0;
+ }
+ else {
+ memcpy(iuf->buf.b + sizeof(iuf->buf.ll) - iuf->rem, p, drem);
+ iuf->rem -= drem;
+ drem = 0;
+ }
+ }
+
+ while (drem >= sizeof(iuf->buf.ll)) {
+ memcpy(iuf->buf.b, p, sizeof(iuf->buf.ll));
+ iuf->h = mum_hash_step(iuf->h, iuf->buf.ll);
+ drem -= sizeof(iuf->buf.ll);
+ p += sizeof(iuf->buf.ll);
+ }
+
+ /* Leftover */
+ if (drem > 0) {
+ iuf->rem = sizeof(guint64) - drem;
+ iuf->buf.ll = 0;
+ memcpy(iuf->buf.b, p, drem);
+ }
+ break;
+ }
+ case RSPAMD_CRYPTOBOX_T1HA:
+ case RSPAMD_CRYPTOBOX_HASHFAST:
+ case RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT: {
+ t1ha_context_t *rst = (t1ha_context_t *) st->opaque;
+ t1ha2_update(rst, data, len);
+ break;
+ }
+ }
+ }
+}
+
+guint64
+rspamd_cryptobox_fast_hash_final(rspamd_cryptobox_fast_hash_state_t *st)
+{
+ guint64 ret;
+
+ if (st->type == RSPAMD_CRYPTOBOX_T1HA) {
+ t1ha_context_t *rst = (t1ha_context_t *) st->opaque;
+
+ return t1ha2_final(rst, NULL);
+ }
+ else {
+ switch (st->type) {
+ case RSPAMD_CRYPTOBOX_XXHASH64: {
+ XXH64_state_t *xst = (XXH64_state_t *) st->opaque;
+ ret = XXH64_digest(xst);
+ break;
+ }
+ case RSPAMD_CRYPTOBOX_XXHASH32: {
+ XXH32_state_t *xst = (XXH32_state_t *) st->opaque;
+ ret = XXH32_digest(xst);
+ break;
+ }
+ case RSPAMD_CRYPTOBOX_XXHASH3: {
+ XXH3_state_t *xst = (XXH3_state_t *) st->opaque;
+ ret = XXH3_64bits_digest(xst);
+ break;
+ }
+ case RSPAMD_CRYPTOBOX_MUMHASH: {
+ struct _mum_iuf *iuf = (struct _mum_iuf *) st->opaque;
+ iuf->h = mum_hash_step(iuf->h, iuf->buf.ll);
+ ret = mum_hash_finish(iuf->h);
+ break;
+ }
+ case RSPAMD_CRYPTOBOX_T1HA:
+ case RSPAMD_CRYPTOBOX_HASHFAST:
+ case RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT: {
+ t1ha_context_t *rst = (t1ha_context_t *) st->opaque;
+
+ ret = t1ha2_final(rst, NULL);
+ break;
+ }
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * One in all function
+ */
+static inline guint64
+rspamd_cryptobox_fast_hash_machdep(const void *data,
+ gsize len, guint64 seed)
+{
+ return XXH3_64bits_withSeed(data, len, seed);
+}
+
+static inline guint64
+rspamd_cryptobox_fast_hash_indep(const void *data,
+ gsize len, guint64 seed)
+{
+ return XXH3_64bits_withSeed(data, len, seed);
+}
+
+guint64
+rspamd_cryptobox_fast_hash(const void *data,
+ gsize len, guint64 seed)
+{
+ return rspamd_cryptobox_fast_hash_machdep(data, len, seed);
+}
+
+guint64
+rspamd_cryptobox_fast_hash_specific(
+ enum rspamd_cryptobox_fast_hash_type type,
+ const void *data,
+ gsize len, guint64 seed)
+{
+ switch (type) {
+ case RSPAMD_CRYPTOBOX_XXHASH32:
+ return XXH32(data, len, seed);
+ case RSPAMD_CRYPTOBOX_XXHASH3:
+ return XXH3_64bits_withSeed(data, len, seed);
+ case RSPAMD_CRYPTOBOX_XXHASH64:
+ return XXH64(data, len, seed);
+ case RSPAMD_CRYPTOBOX_MUMHASH:
+ return mum_hash(data, len, seed);
+ case RSPAMD_CRYPTOBOX_T1HA:
+ return t1ha2_atonce(data, len, seed);
+ case RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT:
+ return rspamd_cryptobox_fast_hash_indep(data, len, seed);
+ case RSPAMD_CRYPTOBOX_HASHFAST:
+ default:
+ return rspamd_cryptobox_fast_hash_machdep(data, len, seed);
+ }
+}
diff --git a/src/libcryptobox/cryptobox.h b/src/libcryptobox/cryptobox.h
new file mode 100644
index 0000000..8cd79bb
--- /dev/null
+++ b/src/libcryptobox/cryptobox.h
@@ -0,0 +1,437 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef CRYPTOBOX_H_
+#define CRYPTOBOX_H_
+
+#include "config.h"
+
+#include <sodium.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_cryptobox_segment {
+ guchar *data;
+ gsize len;
+};
+
+#if defined(__GNUC__) && \
+ ((defined(__clang__) && (__clang_major__ >= 4 || (__clang_major__ >= 3 && __clang_minor__ >= 8))) || \
+ ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 8) || (__GNUC__ > 4)))
+#define RSPAMD_HAS_TARGET_ATTR 1
+#endif
+
+#define rspamd_cryptobox_MAX_NONCEBYTES 24
+#define rspamd_cryptobox_MAX_PKBYTES 65
+#define rspamd_cryptobox_MAX_SKBYTES 32
+#define rspamd_cryptobox_MAX_MACBYTES 16
+#define rspamd_cryptobox_MAX_NMBYTES 32
+#define rspamd_cryptobox_SIPKEYBYTES 16
+#define rspamd_cryptobox_HASHBYTES 64
+#define rspamd_cryptobox_HASHKEYBYTES 64
+#define rspamd_cryptobox_HASHSTATEBYTES sizeof(crypto_generichash_blake2b_state) + 64
+#define rspamd_cryptobox_MAX_SIGSKBYTES 64
+#define rspamd_cryptobox_MAX_SIGPKBYTES 32
+#define rspamd_cryptobox_MAX_SIGBYTES 72
+
+#define CPUID_AVX2 0x1
+#define CPUID_AVX 0x2
+#define CPUID_SSE2 0x4
+#define CPUID_SSE3 0x8
+#define CPUID_SSSE3 0x10
+#define CPUID_SSE41 0x20
+#define CPUID_SSE42 0x40
+#define CPUID_RDRAND 0x80
+
+typedef guchar rspamd_pk_t[rspamd_cryptobox_MAX_PKBYTES];
+typedef guchar rspamd_sk_t[rspamd_cryptobox_MAX_SKBYTES];
+typedef guchar rspamd_mac_t[rspamd_cryptobox_MAX_MACBYTES];
+typedef guchar rspamd_nm_t[rspamd_cryptobox_MAX_NMBYTES];
+typedef guchar rspamd_nonce_t[rspamd_cryptobox_MAX_NONCEBYTES];
+typedef guchar rspamd_sipkey_t[rspamd_cryptobox_SIPKEYBYTES];
+typedef guchar rspamd_signature_t[rspamd_cryptobox_MAX_SIGBYTES];
+typedef guchar rspamd_sig_pk_t[rspamd_cryptobox_MAX_SIGPKBYTES];
+typedef guchar rspamd_sig_sk_t[rspamd_cryptobox_MAX_SIGSKBYTES];
+
+enum rspamd_cryptobox_mode {
+ RSPAMD_CRYPTOBOX_MODE_25519 = 0,
+ RSPAMD_CRYPTOBOX_MODE_NIST
+};
+
+struct rspamd_cryptobox_library_ctx {
+ gchar *cpu_extensions;
+ const gchar *chacha20_impl;
+ const gchar *base64_impl;
+ unsigned long cpu_config;
+};
+
+/**
+ * Init cryptobox library
+ */
+struct rspamd_cryptobox_library_ctx *rspamd_cryptobox_init(void);
+
+void rspamd_cryptobox_deinit(struct rspamd_cryptobox_library_ctx *);
+/**
+ * Generate new keypair
+ * @param pk public key buffer
+ * @param sk secret key buffer
+ */
+void rspamd_cryptobox_keypair(rspamd_pk_t pk, rspamd_sk_t sk,
+ enum rspamd_cryptobox_mode mode);
+
+/**
+ * Generate new keypair for signing
+ * @param pk public key buffer
+ * @param sk secret key buffer
+ */
+void rspamd_cryptobox_keypair_sig(rspamd_sig_pk_t pk, rspamd_sig_sk_t sk,
+ enum rspamd_cryptobox_mode mode);
+
+/**
+ * Encrypt data inplace adding signature to sig afterwards
+ * @param data input buffer
+ * @param pk remote pubkey
+ * @param sk local secret key
+ * @param sig output signature
+ */
+void rspamd_cryptobox_encrypt_inplace(guchar *data, gsize len,
+ const rspamd_nonce_t nonce,
+ const rspamd_pk_t pk, const rspamd_sk_t sk, rspamd_mac_t sig,
+ enum rspamd_cryptobox_mode mode);
+
+/**
+ * Encrypt segments of data inplace adding signature to sig afterwards
+ * @param segments segments of data
+ * @param cnt count of segments
+ * @param pk remote pubkey
+ * @param sk local secret key
+ * @param sig output signature
+ */
+void rspamd_cryptobox_encryptv_inplace(struct rspamd_cryptobox_segment *segments,
+ gsize cnt,
+ const rspamd_nonce_t nonce,
+ const rspamd_pk_t pk, const rspamd_sk_t sk, rspamd_mac_t sig,
+ enum rspamd_cryptobox_mode mode);
+
+
+/**
+ * Decrypt and verify data chunk inplace
+ * @param data data to decrypt
+ * @param len length of data
+ * @param pk remote pubkey
+ * @param sk local privkey
+ * @param sig signature input
+ * @return TRUE if input has been verified successfully
+ */
+gboolean rspamd_cryptobox_decrypt_inplace(guchar *data, gsize len,
+ const rspamd_nonce_t nonce,
+ const rspamd_pk_t pk, const rspamd_sk_t sk, const rspamd_mac_t sig,
+ enum rspamd_cryptobox_mode mode);
+
+/**
+ * Encrypt segments of data inplace adding signature to sig afterwards
+ * @param segments segments of data
+ * @param cnt count of segments
+ * @param pk remote pubkey
+ * @param sk local secret key
+ * @param sig output signature
+ */
+void rspamd_cryptobox_encrypt_nm_inplace(guchar *data, gsize len,
+ const rspamd_nonce_t nonce,
+ const rspamd_nm_t nm, rspamd_mac_t sig,
+ enum rspamd_cryptobox_mode mode);
+
+/**
+ * Encrypt segments of data inplace adding signature to sig afterwards
+ * @param segments segments of data
+ * @param cnt count of segments
+ * @param pk remote pubkey
+ * @param sk local secret key
+ * @param sig output signature
+ */
+void rspamd_cryptobox_encryptv_nm_inplace(struct rspamd_cryptobox_segment *segments,
+ gsize cnt,
+ const rspamd_nonce_t nonce,
+ const rspamd_nm_t nm, rspamd_mac_t sig,
+ enum rspamd_cryptobox_mode mode);
+
+
+/**
+ * Decrypt and verify data chunk inplace
+ * @param data data to decrypt
+ * @param len length of data
+ * @param pk remote pubkey
+ * @param sk local privkey
+ * @param sig signature input
+ * @return TRUE if input has been verified successfully
+ */
+gboolean rspamd_cryptobox_decrypt_nm_inplace(guchar *data, gsize len,
+ const rspamd_nonce_t nonce,
+ const rspamd_nm_t nm, const rspamd_mac_t sig,
+ enum rspamd_cryptobox_mode mode);
+
+/**
+ * Generate shared secret from local sk and remote pk
+ * @param nm shared secret
+ * @param pk remote pubkey
+ * @param sk local privkey
+ */
+void rspamd_cryptobox_nm(rspamd_nm_t nm, const rspamd_pk_t pk,
+ const rspamd_sk_t sk, enum rspamd_cryptobox_mode mode);
+
+/**
+ * Create digital signature for the specified message and place result in `sig`
+ * @param sig signature target
+ * @param siglen_p pointer to signature length (might be NULL)
+ * @param m input message
+ * @param mlen input length
+ * @param sk secret key
+ */
+void rspamd_cryptobox_sign(guchar *sig, unsigned long long *siglen_p,
+ const guchar *m, gsize mlen,
+ const rspamd_sk_t sk,
+ enum rspamd_cryptobox_mode mode);
+
+/**
+ * Verifies digital signature for the specified message using the specified
+ * pubkey
+ * @param sig signature source
+ * @param m input message
+ * @param mlen message length
+ * @param pk public key for verification
+ * @return true if signature is valid, false otherwise
+ */
+bool rspamd_cryptobox_verify(const guchar *sig,
+ gsize siglen,
+ const guchar *m,
+ gsize mlen,
+ const rspamd_pk_t pk,
+ enum rspamd_cryptobox_mode mode);
+
+/**
+ * Securely clear the buffer specified
+ * @param buf buffer to zero
+ * @param buflen length of buffer
+ */
+
+#define rspamd_explicit_memzero sodium_memzero
+
+/**
+ * Constant time memcmp
+ * @param b1_
+ * @param b2_
+ * @param len
+ * @return
+ */
+#define rspamd_cryptobox_memcmp sodium_memcmp
+
+/**
+ * Calculates siphash-2-4 for a message
+ * @param out (8 bytes output)
+ * @param in
+ * @param inlen
+ * @param k key (must be 16 bytes)
+ */
+void rspamd_cryptobox_siphash(unsigned char *out, const unsigned char *in,
+ unsigned long long inlen,
+ const rspamd_sipkey_t k);
+
+enum rspamd_cryptobox_pbkdf_type {
+ RSPAMD_CRYPTOBOX_PBKDF2 = 0,
+ RSPAMD_CRYPTOBOX_CATENA
+};
+
+
+/**
+ * Derive key from password using the specified algorithm
+ * @param pass input password
+ * @param pass_len length of the password
+ * @param salt input salt
+ * @param salt_len length of salt
+ * @param key output key
+ * @param key_len size of the key
+ * @param complexity empiric number of complexity (rounds for pbkdf2 and garlic for catena)
+ * @return TRUE in case of success and FALSE if failed
+ */
+gboolean rspamd_cryptobox_pbkdf(const char *pass, gsize pass_len,
+ const guint8 *salt, gsize salt_len,
+ guint8 *key, gsize key_len,
+ unsigned int complexity,
+ enum rspamd_cryptobox_pbkdf_type type);
+
+
+/**
+ * Real size of rspamd cryptobox public key
+ */
+guint rspamd_cryptobox_pk_bytes(enum rspamd_cryptobox_mode mode);
+
+/**
+ * Real size of rspamd cryptobox signing public key
+ */
+guint rspamd_cryptobox_pk_sig_bytes(enum rspamd_cryptobox_mode mode);
+
+/**
+ * Real size of crypto nonce
+ */
+guint rspamd_cryptobox_nonce_bytes(enum rspamd_cryptobox_mode mode);
+
+/**
+ * Real size of rspamd cryptobox secret key
+ */
+guint rspamd_cryptobox_sk_bytes(enum rspamd_cryptobox_mode mode);
+
+/**
+ * Real size of rspamd cryptobox signing secret key
+ */
+guint rspamd_cryptobox_sk_sig_bytes(enum rspamd_cryptobox_mode mode);
+
+/**
+ * Real size of rspamd cryptobox shared key
+ */
+guint rspamd_cryptobox_nm_bytes(enum rspamd_cryptobox_mode mode);
+
+/**
+ * Real size of rspamd cryptobox MAC signature
+ */
+guint rspamd_cryptobox_mac_bytes(enum rspamd_cryptobox_mode mode);
+
+/**
+ * Real size of rspamd cryptobox digital signature
+ */
+guint rspamd_cryptobox_signature_bytes(enum rspamd_cryptobox_mode mode);
+
+/* Hash IUF interface */
+typedef crypto_generichash_blake2b_state rspamd_cryptobox_hash_state_t;
+
+/**
+ * Init cryptobox hash state using key if needed, `st` must point to the buffer
+ * with at least rspamd_cryptobox_HASHSTATEBYTES bytes length. If keylen == 0, then
+ * non-keyed hash is generated
+ */
+void rspamd_cryptobox_hash_init(rspamd_cryptobox_hash_state_t *st,
+ const guchar *key, gsize keylen);
+
+/**
+ * Update hash with data portion
+ */
+void rspamd_cryptobox_hash_update(rspamd_cryptobox_hash_state_t *st,
+ const guchar *data, gsize len);
+
+/**
+ * Output hash to the buffer of rspamd_cryptobox_HASHBYTES length
+ */
+void rspamd_cryptobox_hash_final(rspamd_cryptobox_hash_state_t *st, guchar *out);
+
+/**
+ * One in all function
+ */
+void rspamd_cryptobox_hash(guchar *out,
+ const guchar *data,
+ gsize len,
+ const guchar *key,
+ gsize keylen);
+
+enum rspamd_cryptobox_fast_hash_type {
+ RSPAMD_CRYPTOBOX_XXHASH64 = 0,
+ RSPAMD_CRYPTOBOX_XXHASH32,
+ RSPAMD_CRYPTOBOX_XXHASH3,
+ RSPAMD_CRYPTOBOX_MUMHASH,
+ RSPAMD_CRYPTOBOX_T1HA,
+ RSPAMD_CRYPTOBOX_HASHFAST,
+ RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT
+};
+
+/* Non crypto hash IUF interface */
+typedef struct CRYPTO_ALIGN(64) rspamd_cryptobox_fast_hash_state_s {
+ guchar opaque[576]; /* Required for xxhash3 */
+ enum rspamd_cryptobox_fast_hash_type type;
+} rspamd_cryptobox_fast_hash_state_t;
+
+
+/**
+ * Creates a new cryptobox state properly aligned
+ * @return
+ */
+rspamd_cryptobox_fast_hash_state_t *rspamd_cryptobox_fast_hash_new(void);
+void rspamd_cryptobox_fast_hash_free(rspamd_cryptobox_fast_hash_state_t *st);
+
+/**
+ * Init cryptobox hash state using key if needed, `st` must point to the buffer
+ * with at least rspamd_cryptobox_HASHSTATEBYTES bytes length. If keylen == 0, then
+ * non-keyed hash is generated
+ */
+void rspamd_cryptobox_fast_hash_init(rspamd_cryptobox_fast_hash_state_t *st,
+ guint64 seed);
+
+/**
+ * Init cryptobox hash state using key if needed, `st` must point to the buffer
+ * with at least rspamd_cryptobox_HASHSTATEBYTES bytes length. If keylen == 0, then
+ * non-keyed hash is generated
+ */
+void rspamd_cryptobox_fast_hash_init_specific(rspamd_cryptobox_fast_hash_state_t *st,
+ enum rspamd_cryptobox_fast_hash_type type,
+ guint64 seed);
+
+/**
+ * Update hash with data portion
+ */
+void rspamd_cryptobox_fast_hash_update(rspamd_cryptobox_fast_hash_state_t *st,
+ const void *data, gsize len);
+
+/**
+ * Output hash to the buffer of rspamd_cryptobox_HASHBYTES length
+ */
+guint64 rspamd_cryptobox_fast_hash_final(rspamd_cryptobox_fast_hash_state_t *st);
+
+/**
+ * One in all function
+ */
+guint64 rspamd_cryptobox_fast_hash(const void *data,
+ gsize len, guint64 seed);
+
+/**
+ * Platform independent version
+ */
+guint64 rspamd_cryptobox_fast_hash_specific(
+ enum rspamd_cryptobox_fast_hash_type type,
+ const void *data,
+ gsize len, guint64 seed);
+
+/**
+ * Decode base64 using platform optimized code
+ * @param in
+ * @param inlen
+ * @param out
+ * @param outlen
+ * @return
+ */
+gboolean rspamd_cryptobox_base64_decode(const gchar *in, gsize inlen,
+ guchar *out, gsize *outlen);
+
+/**
+ * Returns TRUE if data looks like a valid base64 string
+ * @param in
+ * @param inlen
+ * @return
+ */
+gboolean rspamd_cryptobox_base64_is_valid(const gchar *in, gsize inlen);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CRYPTOBOX_H_ */
diff --git a/src/libcryptobox/keypair.c b/src/libcryptobox/keypair.c
new file mode 100644
index 0000000..ec7490a
--- /dev/null
+++ b/src/libcryptobox/keypair.c
@@ -0,0 +1,1021 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "libcryptobox/keypair.h"
+#include "libcryptobox/keypair_private.h"
+#include "libutil/str_util.h"
+#include "libutil/printf.h"
+#include "contrib/libottery/ottery.h"
+
+const guchar encrypted_magic[7] = {'r', 'u', 'c', 'l', 'e', 'v', '1'};
+
+static GQuark
+rspamd_keypair_quark(void)
+{
+ return g_quark_from_static_string("rspamd-cryptobox-keypair");
+}
+
+/**
+ * Returns specific private key for different keypair types
+ */
+static void *
+rspamd_cryptobox_keypair_sk(struct rspamd_cryptobox_keypair *kp,
+ guint *len)
+{
+ g_assert(kp != NULL);
+
+ if (kp->alg == RSPAMD_CRYPTOBOX_MODE_25519) {
+ if (kp->type == RSPAMD_KEYPAIR_KEX) {
+ *len = 32;
+ return RSPAMD_CRYPTOBOX_KEYPAIR_25519(kp)->sk;
+ }
+ else {
+ *len = 64;
+ return RSPAMD_CRYPTOBOX_KEYPAIR_SIG_25519(kp)->sk;
+ }
+ }
+ else {
+ if (kp->type == RSPAMD_KEYPAIR_KEX) {
+ *len = 32;
+ return RSPAMD_CRYPTOBOX_KEYPAIR_NIST(kp)->sk;
+ }
+ else {
+ *len = 32;
+ return RSPAMD_CRYPTOBOX_KEYPAIR_SIG_NIST(kp)->sk;
+ }
+ }
+
+ /* Not reached */
+ return NULL;
+}
+
+static void *
+rspamd_cryptobox_keypair_pk(struct rspamd_cryptobox_keypair *kp,
+ guint *len)
+{
+ g_assert(kp != NULL);
+
+ if (kp->alg == RSPAMD_CRYPTOBOX_MODE_25519) {
+ if (kp->type == RSPAMD_KEYPAIR_KEX) {
+ *len = 32;
+ return RSPAMD_CRYPTOBOX_KEYPAIR_25519(kp)->pk;
+ }
+ else {
+ *len = 32;
+ return RSPAMD_CRYPTOBOX_KEYPAIR_SIG_25519(kp)->pk;
+ }
+ }
+ else {
+ if (kp->type == RSPAMD_KEYPAIR_KEX) {
+ *len = 65;
+ return RSPAMD_CRYPTOBOX_KEYPAIR_NIST(kp)->pk;
+ }
+ else {
+ *len = 65;
+ return RSPAMD_CRYPTOBOX_KEYPAIR_SIG_NIST(kp)->pk;
+ }
+ }
+
+ /* Not reached */
+ return NULL;
+}
+
+static void *
+rspamd_cryptobox_pubkey_pk(const struct rspamd_cryptobox_pubkey *kp,
+ guint *len)
+{
+ g_assert(kp != NULL);
+
+ if (kp->alg == RSPAMD_CRYPTOBOX_MODE_25519) {
+ if (kp->type == RSPAMD_KEYPAIR_KEX) {
+ *len = 32;
+ return RSPAMD_CRYPTOBOX_PUBKEY_25519(kp)->pk;
+ }
+ else {
+ *len = 32;
+ return RSPAMD_CRYPTOBOX_PUBKEY_SIG_25519(kp)->pk;
+ }
+ }
+ else {
+ if (kp->type == RSPAMD_KEYPAIR_KEX) {
+ *len = 65;
+ return RSPAMD_CRYPTOBOX_PUBKEY_NIST(kp)->pk;
+ }
+ else {
+ *len = 65;
+ return RSPAMD_CRYPTOBOX_PUBKEY_SIG_NIST(kp)->pk;
+ }
+ }
+
+ /* Not reached */
+ return NULL;
+}
+
+static struct rspamd_cryptobox_keypair *
+rspamd_cryptobox_keypair_alloc(enum rspamd_cryptobox_keypair_type type,
+ enum rspamd_cryptobox_mode alg)
+{
+ struct rspamd_cryptobox_keypair *kp;
+ guint size = 0;
+
+ if (alg == RSPAMD_CRYPTOBOX_MODE_25519) {
+ if (type == RSPAMD_KEYPAIR_KEX) {
+ size = sizeof(struct rspamd_cryptobox_keypair_25519);
+ }
+ else {
+ size = sizeof(struct rspamd_cryptobox_keypair_sig_25519);
+ }
+ }
+ else {
+ if (type == RSPAMD_KEYPAIR_KEX) {
+ size = sizeof(struct rspamd_cryptobox_keypair_nist);
+ }
+ else {
+ size = sizeof(struct rspamd_cryptobox_keypair_sig_nist);
+ }
+ }
+
+ g_assert(size >= sizeof(*kp));
+
+ if (posix_memalign((void **) &kp, 32, size) != 0) {
+ abort();
+ }
+
+ memset(kp, 0, size);
+
+ return kp;
+}
+
+static struct rspamd_cryptobox_pubkey *
+rspamd_cryptobox_pubkey_alloc(enum rspamd_cryptobox_keypair_type type,
+ enum rspamd_cryptobox_mode alg)
+{
+ struct rspamd_cryptobox_pubkey *pk;
+ guint size = 0;
+
+ if (alg == RSPAMD_CRYPTOBOX_MODE_25519) {
+ if (type == RSPAMD_KEYPAIR_KEX) {
+ size = sizeof(struct rspamd_cryptobox_pubkey_25519);
+ }
+ else {
+ size = sizeof(struct rspamd_cryptobox_pubkey_sig_25519);
+ }
+ }
+ else {
+ if (type == RSPAMD_KEYPAIR_KEX) {
+ size = sizeof(struct rspamd_cryptobox_pubkey_nist);
+ }
+ else {
+ size = sizeof(struct rspamd_cryptobox_pubkey_sig_nist);
+ }
+ }
+
+ g_assert(size >= sizeof(*pk));
+
+ if (posix_memalign((void **) &pk, 32, size) != 0) {
+ abort();
+ }
+
+ memset(pk, 0, size);
+
+ return pk;
+}
+
+
+void rspamd_cryptobox_nm_dtor(struct rspamd_cryptobox_nm *nm)
+{
+ rspamd_explicit_memzero(nm->nm, sizeof(nm->nm));
+ free(nm);
+}
+
+void rspamd_cryptobox_keypair_dtor(struct rspamd_cryptobox_keypair *kp)
+{
+ void *sk;
+ guint len = 0;
+
+ sk = rspamd_cryptobox_keypair_sk(kp, &len);
+ g_assert(sk != NULL && len > 0);
+ rspamd_explicit_memzero(sk, len);
+
+ if (kp->extensions) {
+ ucl_object_unref(kp->extensions);
+ }
+
+ /* Not g_free as kp is aligned using posix_memalign */
+ free(kp);
+}
+
+void rspamd_cryptobox_pubkey_dtor(struct rspamd_cryptobox_pubkey *p)
+{
+ if (p->nm) {
+ REF_RELEASE(p->nm);
+ }
+
+ /* Not g_free as p is aligned using posix_memalign */
+ free(p);
+}
+
+struct rspamd_cryptobox_keypair *
+rspamd_keypair_new(enum rspamd_cryptobox_keypair_type type,
+ enum rspamd_cryptobox_mode alg)
+{
+ struct rspamd_cryptobox_keypair *kp;
+ void *pk, *sk;
+ guint size;
+
+ kp = rspamd_cryptobox_keypair_alloc(type, alg);
+ kp->alg = alg;
+ kp->type = type;
+
+ sk = rspamd_cryptobox_keypair_sk(kp, &size);
+ pk = rspamd_cryptobox_keypair_pk(kp, &size);
+
+ if (type == RSPAMD_KEYPAIR_KEX) {
+ rspamd_cryptobox_keypair(pk, sk, alg);
+ }
+ else {
+ rspamd_cryptobox_keypair_sig(pk, sk, alg);
+ }
+
+ rspamd_cryptobox_hash(kp->id, pk, size, NULL, 0);
+
+ REF_INIT_RETAIN(kp, rspamd_cryptobox_keypair_dtor);
+
+ return kp;
+}
+
+
+struct rspamd_cryptobox_keypair *
+rspamd_keypair_ref(struct rspamd_cryptobox_keypair *kp)
+{
+ REF_RETAIN(kp);
+ return kp;
+}
+
+
+void rspamd_keypair_unref(struct rspamd_cryptobox_keypair *kp)
+{
+ REF_RELEASE(kp);
+}
+
+
+struct rspamd_cryptobox_pubkey *
+rspamd_pubkey_ref(struct rspamd_cryptobox_pubkey *kp)
+{
+ REF_RETAIN(kp);
+ return kp;
+}
+
+void rspamd_pubkey_unref(struct rspamd_cryptobox_pubkey *kp)
+{
+ REF_RELEASE(kp);
+}
+
+enum rspamd_cryptobox_keypair_type
+rspamd_keypair_type(struct rspamd_cryptobox_keypair *kp)
+{
+ g_assert(kp != NULL);
+
+ return kp->type;
+}
+
+enum rspamd_cryptobox_keypair_type
+rspamd_pubkey_type(struct rspamd_cryptobox_pubkey *p)
+{
+ g_assert(p != NULL);
+
+ return p->type;
+}
+
+
+enum rspamd_cryptobox_mode
+rspamd_keypair_alg(struct rspamd_cryptobox_keypair *kp)
+{
+ g_assert(kp != NULL);
+
+ return kp->alg;
+}
+
+enum rspamd_cryptobox_mode
+rspamd_pubkey_alg(struct rspamd_cryptobox_pubkey *p)
+{
+ g_assert(p != NULL);
+
+ return p->alg;
+}
+
+struct rspamd_cryptobox_pubkey *
+rspamd_pubkey_from_base32(const gchar *b32,
+ gsize len,
+ enum rspamd_cryptobox_keypair_type type,
+ enum rspamd_cryptobox_mode alg)
+{
+ guchar *decoded;
+ gsize dlen, expected_len;
+ guint pklen;
+ struct rspamd_cryptobox_pubkey *pk;
+ guchar *pk_data;
+
+ g_assert(b32 != NULL);
+
+ if (len == 0) {
+ len = strlen(b32);
+ }
+
+ decoded = rspamd_decode_base32(b32, len, &dlen, RSPAMD_BASE32_DEFAULT);
+
+ if (decoded == NULL) {
+ return NULL;
+ }
+
+ expected_len = (type == RSPAMD_KEYPAIR_KEX) ? rspamd_cryptobox_pk_bytes(alg) : rspamd_cryptobox_pk_sig_bytes(alg);
+
+ if (dlen != expected_len) {
+ g_free(decoded);
+ return NULL;
+ }
+
+ pk = rspamd_cryptobox_pubkey_alloc(type, alg);
+ REF_INIT_RETAIN(pk, rspamd_cryptobox_pubkey_dtor);
+ pk->alg = alg;
+ pk->type = type;
+ pk_data = rspamd_cryptobox_pubkey_pk(pk, &pklen);
+
+ memcpy(pk_data, decoded, pklen);
+ g_free(decoded);
+ rspamd_cryptobox_hash(pk->id, pk_data, pklen, NULL, 0);
+
+ return pk;
+}
+
+struct rspamd_cryptobox_pubkey *
+rspamd_pubkey_from_hex(const gchar *hex,
+ gsize len,
+ enum rspamd_cryptobox_keypair_type type,
+ enum rspamd_cryptobox_mode alg)
+{
+ guchar *decoded;
+ gsize dlen, expected_len;
+ guint pklen;
+ struct rspamd_cryptobox_pubkey *pk;
+ guchar *pk_data;
+
+ g_assert(hex != NULL);
+
+ if (len == 0) {
+ len = strlen(hex);
+ }
+
+ dlen = len / 2;
+
+ decoded = rspamd_decode_hex(hex, len);
+
+ if (decoded == NULL) {
+ return NULL;
+ }
+
+ expected_len = (type == RSPAMD_KEYPAIR_KEX) ? rspamd_cryptobox_pk_bytes(alg) : rspamd_cryptobox_pk_sig_bytes(alg);
+
+ if (dlen != expected_len) {
+ g_free(decoded);
+ return NULL;
+ }
+
+ pk = rspamd_cryptobox_pubkey_alloc(type, alg);
+ REF_INIT_RETAIN(pk, rspamd_cryptobox_pubkey_dtor);
+ pk->alg = alg;
+ pk->type = type;
+ pk_data = rspamd_cryptobox_pubkey_pk(pk, &pklen);
+
+ memcpy(pk_data, decoded, pklen);
+ g_free(decoded);
+ rspamd_cryptobox_hash(pk->id, pk_data, pklen, NULL, 0);
+
+ return pk;
+}
+
+struct rspamd_cryptobox_pubkey *
+rspamd_pubkey_from_bin(const guchar *raw,
+ gsize len,
+ enum rspamd_cryptobox_keypair_type type,
+ enum rspamd_cryptobox_mode alg)
+{
+ gsize expected_len;
+ guint pklen;
+ struct rspamd_cryptobox_pubkey *pk;
+ guchar *pk_data;
+
+ g_assert(raw != NULL && len > 0);
+
+ expected_len = (type == RSPAMD_KEYPAIR_KEX) ? rspamd_cryptobox_pk_bytes(alg) : rspamd_cryptobox_pk_sig_bytes(alg);
+
+ if (len != expected_len) {
+ return NULL;
+ }
+
+ pk = rspamd_cryptobox_pubkey_alloc(type, alg);
+ REF_INIT_RETAIN(pk, rspamd_cryptobox_pubkey_dtor);
+ pk->alg = alg;
+ pk->type = type;
+ pk_data = rspamd_cryptobox_pubkey_pk(pk, &pklen);
+
+ memcpy(pk_data, raw, pklen);
+ rspamd_cryptobox_hash(pk->id, pk_data, pklen, NULL, 0);
+
+ return pk;
+}
+
+
+const guchar *
+rspamd_pubkey_get_nm(struct rspamd_cryptobox_pubkey *p,
+ struct rspamd_cryptobox_keypair *kp)
+{
+ g_assert(p != NULL);
+
+ if (p->nm) {
+ if (memcmp(kp->id, (const guchar *) &p->nm->sk_id, sizeof(guint64)) == 0) {
+ return p->nm->nm;
+ }
+
+ /* Wrong ID, need to recalculate */
+ REF_RELEASE(p->nm);
+ p->nm = NULL;
+ }
+
+ return NULL;
+}
+
+const guchar *
+rspamd_pubkey_calculate_nm(struct rspamd_cryptobox_pubkey *p,
+ struct rspamd_cryptobox_keypair *kp)
+{
+ g_assert(kp->alg == p->alg);
+ g_assert(kp->type == p->type);
+ g_assert(p->type == RSPAMD_KEYPAIR_KEX);
+
+ if (p->nm == NULL) {
+ if (posix_memalign((void **) &p->nm, 32, sizeof(*p->nm)) != 0) {
+ abort();
+ }
+
+ memcpy(&p->nm->sk_id, kp->id, sizeof(guint64));
+ REF_INIT_RETAIN(p->nm, rspamd_cryptobox_nm_dtor);
+ }
+
+ if (kp->alg == RSPAMD_CRYPTOBOX_MODE_25519) {
+ struct rspamd_cryptobox_pubkey_25519 *rk_25519 =
+ RSPAMD_CRYPTOBOX_PUBKEY_25519(p);
+ struct rspamd_cryptobox_keypair_25519 *sk_25519 =
+ RSPAMD_CRYPTOBOX_KEYPAIR_25519(kp);
+
+ rspamd_cryptobox_nm(p->nm->nm, rk_25519->pk, sk_25519->sk, p->alg);
+ }
+ else {
+ struct rspamd_cryptobox_pubkey_nist *rk_nist =
+ RSPAMD_CRYPTOBOX_PUBKEY_NIST(p);
+ struct rspamd_cryptobox_keypair_nist *sk_nist =
+ RSPAMD_CRYPTOBOX_KEYPAIR_NIST(kp);
+
+ rspamd_cryptobox_nm(p->nm->nm, rk_nist->pk, sk_nist->sk, p->alg);
+ }
+
+ return p->nm->nm;
+}
+
+const guchar *
+rspamd_keypair_get_id(struct rspamd_cryptobox_keypair *kp)
+{
+ g_assert(kp != NULL);
+
+ return kp->id;
+}
+
+const ucl_object_t *
+rspamd_keypair_get_extensions(struct rspamd_cryptobox_keypair *kp)
+{
+ g_assert(kp != NULL);
+
+ return kp->extensions;
+}
+
+const guchar *
+rspamd_pubkey_get_id(struct rspamd_cryptobox_pubkey *pk)
+{
+ g_assert(pk != NULL);
+
+ return pk->id;
+}
+
+const guchar *
+rspamd_pubkey_get_pk(struct rspamd_cryptobox_pubkey *pk,
+ guint *len)
+{
+ guchar *ret = NULL;
+ guint rlen;
+
+ ret = rspamd_cryptobox_pubkey_pk(pk, &rlen);
+
+ if (len) {
+ *len = rlen;
+ }
+
+ return ret;
+}
+
+static void
+rspamd_keypair_print_component(guchar *data, gsize datalen,
+ GString *res, guint how, const gchar *description)
+{
+ gint olen, b32_len;
+
+ if (how & RSPAMD_KEYPAIR_HUMAN) {
+ rspamd_printf_gstring(res, "%s: ", description);
+ }
+
+ if (how & RSPAMD_KEYPAIR_BASE32) {
+ b32_len = (datalen * 8 / 5) + 2;
+ g_string_set_size(res, res->len + b32_len);
+ res->len -= b32_len;
+ olen = rspamd_encode_base32_buf(data, datalen, res->str + res->len,
+ res->len + b32_len - 1, RSPAMD_BASE32_DEFAULT);
+
+ if (olen > 0) {
+ res->len += olen;
+ res->str[res->len] = '\0';
+ }
+ }
+ else if (how & RSPAMD_KEYPAIR_HEX) {
+ rspamd_printf_gstring(res, "%*xs", (gint) datalen, data);
+ }
+ else {
+ g_string_append_len(res, data, datalen);
+ }
+
+ if (how & RSPAMD_KEYPAIR_HUMAN) {
+ g_string_append_c(res, '\n');
+ }
+}
+
+GString *
+rspamd_keypair_print(struct rspamd_cryptobox_keypair *kp, guint how)
+{
+ GString *res;
+ guint len;
+ gpointer p;
+
+ g_assert(kp != NULL);
+
+ res = g_string_sized_new(63);
+
+ if ((how & RSPAMD_KEYPAIR_PUBKEY)) {
+ p = rspamd_cryptobox_keypair_pk(kp, &len);
+ rspamd_keypair_print_component(p, len, res, how, "Public key");
+ }
+ if ((how & RSPAMD_KEYPAIR_PRIVKEY)) {
+ p = rspamd_cryptobox_keypair_sk(kp, &len);
+ rspamd_keypair_print_component(p, len, res, how, "Private key");
+ }
+ if ((how & RSPAMD_KEYPAIR_ID_SHORT)) {
+ rspamd_keypair_print_component(kp->id, RSPAMD_KEYPAIR_SHORT_ID_LEN,
+ res, how, "Short key ID");
+ }
+ if ((how & RSPAMD_KEYPAIR_ID)) {
+ rspamd_keypair_print_component(kp->id, sizeof(kp->id), res, how, "Key ID");
+ }
+
+ return res;
+}
+
+GString *
+rspamd_pubkey_print(struct rspamd_cryptobox_pubkey *pk, guint how)
+{
+ GString *res;
+ guint len;
+ gpointer p;
+
+ g_assert(pk != NULL);
+
+ res = g_string_sized_new(63);
+
+ if ((how & RSPAMD_KEYPAIR_PUBKEY)) {
+ p = rspamd_cryptobox_pubkey_pk(pk, &len);
+ rspamd_keypair_print_component(p, len, res, how, "Public key");
+ }
+ if ((how & RSPAMD_KEYPAIR_ID_SHORT)) {
+ rspamd_keypair_print_component(pk->id, RSPAMD_KEYPAIR_SHORT_ID_LEN,
+ res, how, "Short key ID");
+ }
+ if ((how & RSPAMD_KEYPAIR_ID)) {
+ rspamd_keypair_print_component(pk->id, sizeof(pk->id), res, how,
+ "Key ID");
+ }
+
+ return res;
+}
+
+const guchar *
+rspamd_keypair_component(struct rspamd_cryptobox_keypair *kp,
+ guint ncomp, guint *len)
+{
+ guint rlen = 0;
+ const guchar *ret = NULL;
+
+ g_assert(kp != NULL);
+
+ switch (ncomp) {
+ case RSPAMD_KEYPAIR_COMPONENT_ID:
+ rlen = sizeof(kp->id);
+ ret = kp->id;
+ break;
+ case RSPAMD_KEYPAIR_COMPONENT_PK:
+ ret = rspamd_cryptobox_keypair_pk(kp, &rlen);
+ break;
+ case RSPAMD_KEYPAIR_COMPONENT_SK:
+ ret = rspamd_cryptobox_keypair_sk(kp, &rlen);
+ break;
+ }
+
+ if (len) {
+ *len = rlen;
+ }
+
+ return ret;
+}
+
+struct rspamd_cryptobox_keypair *
+rspamd_keypair_from_ucl(const ucl_object_t *obj)
+{
+ const ucl_object_t *privkey, *pubkey, *elt;
+ const gchar *str;
+ enum rspamd_cryptobox_keypair_type type = RSPAMD_KEYPAIR_KEX;
+ enum rspamd_cryptobox_mode mode = RSPAMD_CRYPTOBOX_MODE_25519;
+ gboolean is_hex = FALSE;
+ struct rspamd_cryptobox_keypair *kp;
+ guint len;
+ gsize ucl_len;
+ gint dec_len;
+ gpointer target;
+
+ if (ucl_object_type(obj) != UCL_OBJECT) {
+ return NULL;
+ }
+
+ elt = ucl_object_lookup(obj, "keypair");
+ if (elt != NULL) {
+ obj = elt;
+ }
+
+ pubkey = ucl_object_lookup_any(obj, "pubkey", "public", "public_key",
+ NULL);
+ if (pubkey == NULL || ucl_object_type(pubkey) != UCL_STRING) {
+ return NULL;
+ }
+
+ privkey = ucl_object_lookup_any(obj, "privkey", "private", "private_key",
+ "secret", "secret_key", NULL);
+ if (privkey == NULL || ucl_object_type(privkey) != UCL_STRING) {
+ return NULL;
+ }
+
+ /* Optional fields */
+ elt = ucl_object_lookup(obj, "type");
+ if (elt && ucl_object_type(elt) == UCL_STRING) {
+ str = ucl_object_tostring(elt);
+
+ if (g_ascii_strcasecmp(str, "kex") == 0) {
+ type = RSPAMD_KEYPAIR_KEX;
+ }
+ else if (g_ascii_strcasecmp(str, "sign") == 0) {
+ type = RSPAMD_KEYPAIR_SIGN;
+ }
+ /* TODO: handle errors */
+ }
+
+ elt = ucl_object_lookup(obj, "algorithm");
+ if (elt && ucl_object_type(elt) == UCL_STRING) {
+ str = ucl_object_tostring(elt);
+
+ if (g_ascii_strcasecmp(str, "curve25519") == 0) {
+ mode = RSPAMD_CRYPTOBOX_MODE_25519;
+ }
+ else if (g_ascii_strcasecmp(str, "nistp256") == 0) {
+ mode = RSPAMD_CRYPTOBOX_MODE_NIST;
+ }
+ /* TODO: handle errors */
+ }
+
+ elt = ucl_object_lookup(obj, "encoding");
+ if (elt && ucl_object_type(elt) == UCL_STRING) {
+ str = ucl_object_tostring(elt);
+
+ if (g_ascii_strcasecmp(str, "hex") == 0) {
+ is_hex = TRUE;
+ }
+ /* TODO: handle errors */
+ }
+
+ kp = rspamd_cryptobox_keypair_alloc(type, mode);
+ kp->type = type;
+ kp->alg = mode;
+ REF_INIT_RETAIN(kp, rspamd_cryptobox_keypair_dtor);
+ g_assert(kp != NULL);
+
+ target = rspamd_cryptobox_keypair_sk(kp, &len);
+ str = ucl_object_tolstring(privkey, &ucl_len);
+
+ if (is_hex) {
+ dec_len = rspamd_decode_hex_buf(str, ucl_len, target, len);
+ }
+ else {
+ dec_len = rspamd_decode_base32_buf(str, ucl_len, target, len, RSPAMD_BASE32_DEFAULT);
+ }
+
+ if (dec_len != (gint) len) {
+ rspamd_keypair_unref(kp);
+
+ return NULL;
+ }
+
+ target = rspamd_cryptobox_keypair_pk(kp, &len);
+ str = ucl_object_tolstring(pubkey, &ucl_len);
+
+ if (is_hex) {
+ dec_len = rspamd_decode_hex_buf(str, ucl_len, target, len);
+ }
+ else {
+ dec_len = rspamd_decode_base32_buf(str, ucl_len, target, len, RSPAMD_BASE32_DEFAULT);
+ }
+
+ if (dec_len != (gint) len) {
+ rspamd_keypair_unref(kp);
+
+ return NULL;
+ }
+
+ rspamd_cryptobox_hash(kp->id, target, len, NULL, 0);
+
+ elt = ucl_object_lookup(obj, "extensions");
+ if (elt && ucl_object_type(elt) == UCL_OBJECT) {
+ /* Use copy to avoid issues with the refcounts */
+ kp->extensions = ucl_object_copy(elt);
+ }
+
+ return kp;
+}
+
+ucl_object_t *
+rspamd_keypair_to_ucl(struct rspamd_cryptobox_keypair *kp,
+ enum rspamd_keypair_dump_flags flags)
+{
+ ucl_object_t *ucl_out, *elt;
+ gint how = 0;
+ GString *keypair_out;
+ const gchar *encoding;
+
+ g_assert(kp != NULL);
+
+ if (flags & RSPAMD_KEYPAIR_DUMP_HEX) {
+ how |= RSPAMD_KEYPAIR_HEX;
+ encoding = "hex";
+ }
+ else {
+ how |= RSPAMD_KEYPAIR_BASE32;
+ encoding = "base32";
+ }
+
+ if (flags & RSPAMD_KEYPAIR_DUMP_FLATTENED) {
+ ucl_out = ucl_object_typed_new(UCL_OBJECT);
+ elt = ucl_out;
+ }
+ else {
+ ucl_out = ucl_object_typed_new(UCL_OBJECT);
+ elt = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key(ucl_out, elt, "keypair", 0, false);
+ }
+
+
+ /* pubkey part */
+ keypair_out = rspamd_keypair_print(kp,
+ RSPAMD_KEYPAIR_PUBKEY | how);
+ ucl_object_insert_key(elt,
+ ucl_object_fromlstring(keypair_out->str, keypair_out->len),
+ "pubkey", 0, false);
+ g_string_free(keypair_out, TRUE);
+
+ if (!(flags & RSPAMD_KEYPAIR_DUMP_NO_SECRET)) {
+ /* privkey part */
+ keypair_out = rspamd_keypair_print(kp,
+ RSPAMD_KEYPAIR_PRIVKEY | how);
+ ucl_object_insert_key(elt,
+ ucl_object_fromlstring(keypair_out->str, keypair_out->len),
+ "privkey", 0, false);
+ g_string_free(keypair_out, TRUE);
+ }
+
+ keypair_out = rspamd_keypair_print(kp,
+ RSPAMD_KEYPAIR_ID | how);
+ ucl_object_insert_key(elt,
+ ucl_object_fromlstring(keypair_out->str, keypair_out->len),
+ "id", 0, false);
+ g_string_free(keypair_out, TRUE);
+
+ ucl_object_insert_key(elt,
+ ucl_object_fromstring(encoding),
+ "encoding", 0, false);
+
+ ucl_object_insert_key(elt,
+ ucl_object_fromstring(
+ kp->alg == RSPAMD_CRYPTOBOX_MODE_NIST ? "nistp256" : "curve25519"),
+ "algorithm", 0, false);
+
+ ucl_object_insert_key(elt,
+ ucl_object_fromstring(
+ kp->type == RSPAMD_KEYPAIR_KEX ? "kex" : "sign"),
+ "type", 0, false);
+
+ if (kp->extensions) {
+ ucl_object_insert_key(elt, ucl_object_copy(kp->extensions),
+ "extensions", 0, false);
+ }
+
+ return ucl_out;
+}
+
+gboolean
+rspamd_keypair_decrypt(struct rspamd_cryptobox_keypair *kp,
+ const guchar *in, gsize inlen,
+ guchar **out, gsize *outlen,
+ GError **err)
+{
+ const guchar *nonce, *mac, *data, *pubkey;
+
+ g_assert(kp != NULL);
+ g_assert(in != NULL);
+
+ if (kp->type != RSPAMD_KEYPAIR_KEX) {
+ g_set_error(err, rspamd_keypair_quark(), EINVAL,
+ "invalid keypair type");
+
+ return FALSE;
+ }
+
+ if (inlen < sizeof(encrypted_magic) + rspamd_cryptobox_pk_bytes(kp->alg) +
+ rspamd_cryptobox_mac_bytes(kp->alg) +
+ rspamd_cryptobox_nonce_bytes(kp->alg)) {
+ g_set_error(err, rspamd_keypair_quark(), E2BIG, "invalid size: too small");
+
+ return FALSE;
+ }
+
+ if (memcmp(in, encrypted_magic, sizeof(encrypted_magic)) != 0) {
+ g_set_error(err, rspamd_keypair_quark(), EINVAL,
+ "invalid magic");
+
+ return FALSE;
+ }
+
+ /* Set pointers */
+ pubkey = in + sizeof(encrypted_magic);
+ mac = pubkey + rspamd_cryptobox_pk_bytes(kp->alg);
+ nonce = mac + rspamd_cryptobox_mac_bytes(kp->alg);
+ data = nonce + rspamd_cryptobox_nonce_bytes(kp->alg);
+
+ if (data - in >= inlen) {
+ g_set_error(err, rspamd_keypair_quark(), E2BIG, "invalid size: too small");
+
+ return FALSE;
+ }
+
+ inlen -= data - in;
+
+ /* Allocate memory for output */
+ *out = g_malloc(inlen);
+ memcpy(*out, data, inlen);
+
+ if (!rspamd_cryptobox_decrypt_inplace(*out, inlen, nonce, pubkey,
+ rspamd_keypair_component(kp, RSPAMD_KEYPAIR_COMPONENT_SK, NULL),
+ mac, kp->alg)) {
+ g_set_error(err, rspamd_keypair_quark(), EPERM, "verification failed");
+ g_free(*out);
+
+ return FALSE;
+ }
+
+ if (outlen) {
+ *outlen = inlen;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_keypair_encrypt(struct rspamd_cryptobox_keypair *kp,
+ const guchar *in, gsize inlen,
+ guchar **out, gsize *outlen,
+ GError **err)
+{
+ guchar *nonce, *mac, *data, *pubkey;
+ struct rspamd_cryptobox_keypair *local;
+ gsize olen;
+
+ g_assert(kp != NULL);
+ g_assert(in != NULL);
+
+ if (kp->type != RSPAMD_KEYPAIR_KEX) {
+ g_set_error(err, rspamd_keypair_quark(), EINVAL,
+ "invalid keypair type");
+
+ return FALSE;
+ }
+
+ local = rspamd_keypair_new(kp->type, kp->alg);
+
+ olen = inlen + sizeof(encrypted_magic) +
+ rspamd_cryptobox_pk_bytes(kp->alg) +
+ rspamd_cryptobox_mac_bytes(kp->alg) +
+ rspamd_cryptobox_nonce_bytes(kp->alg);
+ *out = g_malloc(olen);
+ memcpy(*out, encrypted_magic, sizeof(encrypted_magic));
+ pubkey = *out + sizeof(encrypted_magic);
+ mac = pubkey + rspamd_cryptobox_pk_bytes(kp->alg);
+ nonce = mac + rspamd_cryptobox_mac_bytes(kp->alg);
+ data = nonce + rspamd_cryptobox_nonce_bytes(kp->alg);
+
+ ottery_rand_bytes(nonce, rspamd_cryptobox_nonce_bytes(kp->alg));
+ memcpy(data, in, inlen);
+ memcpy(pubkey, rspamd_keypair_component(kp, RSPAMD_KEYPAIR_COMPONENT_PK, NULL),
+ rspamd_cryptobox_pk_bytes(kp->alg));
+ rspamd_cryptobox_encrypt_inplace(data, inlen, nonce, pubkey,
+ rspamd_keypair_component(local, RSPAMD_KEYPAIR_COMPONENT_SK, NULL),
+ mac, kp->alg);
+ rspamd_keypair_unref(local);
+
+ if (outlen) {
+ *outlen = olen;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_pubkey_encrypt(struct rspamd_cryptobox_pubkey *pk,
+ const guchar *in, gsize inlen,
+ guchar **out, gsize *outlen,
+ GError **err)
+{
+ guchar *nonce, *mac, *data, *pubkey;
+ struct rspamd_cryptobox_keypair *local;
+ gsize olen;
+
+ g_assert(pk != NULL);
+ g_assert(in != NULL);
+
+ if (pk->type != RSPAMD_KEYPAIR_KEX) {
+ g_set_error(err, rspamd_keypair_quark(), EINVAL,
+ "invalid pubkey type");
+
+ return FALSE;
+ }
+
+ local = rspamd_keypair_new(pk->type, pk->alg);
+
+ olen = inlen + sizeof(encrypted_magic) +
+ rspamd_cryptobox_pk_bytes(pk->alg) +
+ rspamd_cryptobox_mac_bytes(pk->alg) +
+ rspamd_cryptobox_nonce_bytes(pk->alg);
+ *out = g_malloc(olen);
+ memcpy(*out, encrypted_magic, sizeof(encrypted_magic));
+ pubkey = *out + sizeof(encrypted_magic);
+ mac = pubkey + rspamd_cryptobox_pk_bytes(pk->alg);
+ nonce = mac + rspamd_cryptobox_mac_bytes(pk->alg);
+ data = nonce + rspamd_cryptobox_nonce_bytes(pk->alg);
+
+ ottery_rand_bytes(nonce, rspamd_cryptobox_nonce_bytes(pk->alg));
+ memcpy(data, in, inlen);
+ memcpy(pubkey, rspamd_pubkey_get_pk(pk, NULL),
+ rspamd_cryptobox_pk_bytes(pk->alg));
+ rspamd_cryptobox_encrypt_inplace(data, inlen, nonce, pubkey,
+ rspamd_keypair_component(local, RSPAMD_KEYPAIR_COMPONENT_SK, NULL),
+ mac, pk->alg);
+ rspamd_keypair_unref(local);
+
+ if (outlen) {
+ *outlen = olen;
+ }
+
+ return TRUE;
+} \ No newline at end of file
diff --git a/src/libcryptobox/keypair.h b/src/libcryptobox/keypair.h
new file mode 100644
index 0000000..64461b7
--- /dev/null
+++ b/src/libcryptobox/keypair.h
@@ -0,0 +1,317 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBCRYPTOBOX_KEYPAIR_H_
+#define SRC_LIBCRYPTOBOX_KEYPAIR_H_
+
+#include "config.h"
+#include "cryptobox.h"
+#include "ucl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Keypair type
+ */
+enum rspamd_cryptobox_keypair_type {
+ RSPAMD_KEYPAIR_KEX = 0,
+ RSPAMD_KEYPAIR_SIGN
+};
+
+extern const guchar encrypted_magic[7];
+
+/**
+ * Opaque structure for the full (public + private) keypair
+ */
+struct rspamd_cryptobox_keypair;
+/**
+ * Opaque structure for public only keypair
+ */
+struct rspamd_cryptobox_pubkey;
+
+/**
+ * Creates new full keypair
+ * @param type type of the keypair
+ * @param alg algorithm for the keypair
+ * @return fresh keypair generated
+ */
+struct rspamd_cryptobox_keypair *rspamd_keypair_new(
+ enum rspamd_cryptobox_keypair_type type,
+ enum rspamd_cryptobox_mode alg);
+
+/**
+ * Increase refcount for the specific keypair
+ * @param kp
+ * @return
+ */
+struct rspamd_cryptobox_keypair *rspamd_keypair_ref(
+ struct rspamd_cryptobox_keypair *kp);
+
+/**
+ * Decrease refcount for the specific keypair (or destroy when refcount == 0)
+ * @param kp
+ */
+void rspamd_keypair_unref(struct rspamd_cryptobox_keypair *kp);
+
+/**
+ * Increase refcount for the specific pubkey
+ * @param kp
+ * @return
+ */
+struct rspamd_cryptobox_pubkey *rspamd_pubkey_ref(
+ struct rspamd_cryptobox_pubkey *kp);
+
+/**
+ * Load pubkey from base32 string
+ * @param b32 input string
+ * @param type type of key (signing or kex)
+ * @param alg algorithm of the key (nist or curve25519)
+ * @return new pubkey or NULL in case of error
+ */
+struct rspamd_cryptobox_pubkey *rspamd_pubkey_from_base32(const gchar *b32,
+ gsize len,
+ enum rspamd_cryptobox_keypair_type type,
+ enum rspamd_cryptobox_mode alg);
+
+/**
+ * Load pubkey from hex string
+ * @param hex input string
+ * @param type type of key (signing or kex)
+ * @param alg algorithm of the key (nist or curve25519)
+ * @return new pubkey or NULL in case of error
+ */
+struct rspamd_cryptobox_pubkey *rspamd_pubkey_from_hex(const gchar *hex,
+ gsize len,
+ enum rspamd_cryptobox_keypair_type type,
+ enum rspamd_cryptobox_mode alg);
+
+/**
+ * Load pubkey from raw chunk string
+ * @param hex input data
+ * @param type type of key (signing or kex)
+ * @param alg algorithm of the key (nist or curve25519)
+ * @return new pubkey or NULL in case of error
+ */
+struct rspamd_cryptobox_pubkey *rspamd_pubkey_from_bin(const guchar *raw,
+ gsize len,
+ enum rspamd_cryptobox_keypair_type type,
+ enum rspamd_cryptobox_mode alg);
+
+
+/**
+ * Decrease refcount for the specific pubkey (or destroy when refcount == 0)
+ * @param kp
+ */
+void rspamd_pubkey_unref(struct rspamd_cryptobox_pubkey *kp);
+
+/**
+ * Get type of keypair
+ */
+enum rspamd_cryptobox_keypair_type rspamd_keypair_type(
+ struct rspamd_cryptobox_keypair *kp);
+
+/**
+ * Get type of pubkey
+ */
+enum rspamd_cryptobox_keypair_type rspamd_pubkey_type(
+ struct rspamd_cryptobox_pubkey *p);
+
+/**
+ * Get algorithm of keypair
+ */
+enum rspamd_cryptobox_mode rspamd_keypair_alg(struct rspamd_cryptobox_keypair *kp);
+
+/**
+ * Get algorithm of pubkey
+ */
+enum rspamd_cryptobox_mode rspamd_pubkey_alg(struct rspamd_cryptobox_pubkey *p);
+
+/**
+ * Get cached NM for this specific pubkey
+ * @param p
+ * @return
+ */
+const guchar *rspamd_pubkey_get_nm(struct rspamd_cryptobox_pubkey *p,
+ struct rspamd_cryptobox_keypair *kp);
+
+/**
+ * Calculate and store nm value for the specified local key (performs ECDH)
+ * @param p
+ * @return
+ */
+const guchar *rspamd_pubkey_calculate_nm(struct rspamd_cryptobox_pubkey *p,
+ struct rspamd_cryptobox_keypair *kp);
+
+/**
+ * Get raw public key id for a specified keypair (rspamd_cryptobox_HASHBYTES)
+ * @param kp
+ * @return
+ */
+const guchar *rspamd_keypair_get_id(struct rspamd_cryptobox_keypair *kp);
+
+/**
+ * Returns keypair extensions if any
+ * @param kp
+ * @return
+ */
+const ucl_object_t *rspamd_keypair_get_extensions(struct rspamd_cryptobox_keypair *kp);
+
+/**
+ * Get raw public key id for a specified key (rspamd_cryptobox_HASHBYTES)
+ * @param kp
+ * @return
+ */
+const guchar *rspamd_pubkey_get_id(struct rspamd_cryptobox_pubkey *pk);
+
+/**
+ * Get raw public key from pubkey opaque structure
+ * @param pk
+ * @param len
+ * @return
+ */
+const guchar *rspamd_pubkey_get_pk(struct rspamd_cryptobox_pubkey *pk,
+ guint *len);
+
+/** Short ID characters count */
+#define RSPAMD_KEYPAIR_SHORT_ID_LEN 5
+/** Print pubkey */
+#define RSPAMD_KEYPAIR_PUBKEY 0x1
+/** Print secret key */
+#define RSPAMD_KEYPAIR_PRIVKEY 0x2
+/** Print key id */
+#define RSPAMD_KEYPAIR_ID 0x4
+/** Print short key id */
+#define RSPAMD_KEYPAIR_ID_SHORT 0x8
+/** Encode output with base 32 */
+#define RSPAMD_KEYPAIR_BASE32 0x10
+/** Human readable output */
+#define RSPAMD_KEYPAIR_HUMAN 0x20
+#define RSPAMD_KEYPAIR_HEX 0x40
+
+/**
+ * Print keypair encoding it if needed
+ * @param key key to print
+ * @param how flags that specifies printing behaviour
+ * @return newly allocated string with keypair
+ */
+GString *rspamd_keypair_print(struct rspamd_cryptobox_keypair *kp,
+ guint how);
+
+/**
+ * Print pubkey encoding it if needed
+ * @param key key to print
+ * @param how flags that specifies printing behaviour
+ * @return newly allocated string with keypair
+ */
+GString *rspamd_pubkey_print(struct rspamd_cryptobox_pubkey *pk,
+ guint how);
+
+/** Get keypair pubkey ID */
+#define RSPAMD_KEYPAIR_COMPONENT_ID 0
+/** Get keypair public key */
+#define RSPAMD_KEYPAIR_COMPONENT_PK 1
+/** Get keypair private key */
+#define RSPAMD_KEYPAIR_COMPONENT_SK 2
+
+/**
+ * Get specific component of a keypair
+ * @param kp keypair
+ * @param ncomp component number
+ * @param len length of input
+ * @return raw content of the component
+ */
+const guchar *rspamd_keypair_component(struct rspamd_cryptobox_keypair *kp,
+ guint ncomp, guint *len);
+
+/**
+ * Create a new keypair from ucl object
+ * @param obj object to load
+ * @return new structure or NULL if an object is invalid
+ */
+struct rspamd_cryptobox_keypair *rspamd_keypair_from_ucl(const ucl_object_t *obj);
+
+
+enum rspamd_keypair_dump_flags {
+ RSPAMD_KEYPAIR_DUMP_DEFAULT = 0,
+ RSPAMD_KEYPAIR_DUMP_HEX = 1u << 0u,
+ RSPAMD_KEYPAIR_DUMP_NO_SECRET = 1u << 1u,
+ RSPAMD_KEYPAIR_DUMP_FLATTENED = 1u << 2u,
+};
+
+/**
+ * Converts keypair to ucl object
+ * @param kp
+ * @return
+ */
+ucl_object_t *rspamd_keypair_to_ucl(struct rspamd_cryptobox_keypair *kp,
+ enum rspamd_keypair_dump_flags flags);
+
+
+/**
+ * Decrypts data using keypair and a pubkey stored in in, in must start from
+ * `encrypted_magic` constant
+ * @param kp keypair
+ * @param in raw input
+ * @param inlen input length
+ * @param out output (allocated internally using g_malloc)
+ * @param outlen output size
+ * @return TRUE if decryption is completed, out must be freed in this case
+ */
+gboolean rspamd_keypair_decrypt(struct rspamd_cryptobox_keypair *kp,
+ const guchar *in, gsize inlen,
+ guchar **out, gsize *outlen,
+ GError **err);
+
+/**
+ * Encrypts data usign specific keypair.
+ * This method actually generates ephemeral local keypair, use public key from
+ * the remote keypair and encrypts data
+ * @param kp keypair
+ * @param in raw input
+ * @param inlen input length
+ * @param out output (allocated internally using g_malloc)
+ * @param outlen output size
+ * @param err pointer to error
+ * @return TRUE if encryption has been completed, out must be freed in this case
+ */
+gboolean rspamd_keypair_encrypt(struct rspamd_cryptobox_keypair *kp,
+ const guchar *in, gsize inlen,
+ guchar **out, gsize *outlen,
+ GError **err);
+
+/**
+ * Encrypts data usign specific pubkey (must have KEX type).
+ * This method actually generates ephemeral local keypair, use public key from
+ * the remote keypair and encrypts data
+ * @param kp keypair
+ * @param in raw input
+ * @param inlen input length
+ * @param out output (allocated internally using g_malloc)
+ * @param outlen output size
+ * @param err pointer to error
+ * @return TRUE if encryption has been completed, out must be freed in this case
+ */
+gboolean rspamd_pubkey_encrypt(struct rspamd_cryptobox_pubkey *pk,
+ const guchar *in, gsize inlen,
+ guchar **out, gsize *outlen,
+ GError **err);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBCRYPTOBOX_KEYPAIR_H_ */
diff --git a/src/libcryptobox/keypair_private.h b/src/libcryptobox/keypair_private.h
new file mode 100644
index 0000000..16e17e0
--- /dev/null
+++ b/src/libcryptobox/keypair_private.h
@@ -0,0 +1,143 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KEYPAIR_PRIVATE_H_
+#define KEYPAIR_PRIVATE_H_
+
+#include "config.h"
+#include "ref.h"
+#include "cryptobox.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * KEX cached data
+ */
+struct rspamd_cryptobox_nm {
+ guchar nm[rspamd_cryptobox_MAX_NMBYTES];
+ guint64 sk_id; /* Used to store secret key id */
+ ref_entry_t ref;
+};
+
+/*
+ * Generic keypair
+ */
+struct rspamd_cryptobox_keypair {
+ guchar id[rspamd_cryptobox_HASHBYTES];
+ enum rspamd_cryptobox_keypair_type type;
+ enum rspamd_cryptobox_mode alg;
+ ucl_object_t *extensions;
+ ref_entry_t ref;
+};
+
+/*
+ * NIST p256 ecdh keypair
+ */
+#define RSPAMD_CRYPTOBOX_KEYPAIR_NIST(x) ((struct rspamd_cryptobox_keypair_nist *) (x))
+struct rspamd_cryptobox_keypair_nist {
+ struct rspamd_cryptobox_keypair parent;
+ guchar sk[32];
+ guchar pk[65];
+};
+
+/*
+ * Curve25519 ecdh keypair
+ */
+#define RSPAMD_CRYPTOBOX_KEYPAIR_25519(x) ((struct rspamd_cryptobox_keypair_25519 *) (x))
+struct rspamd_cryptobox_keypair_25519 {
+ struct rspamd_cryptobox_keypair parent;
+ guchar sk[32];
+ guchar pk[32];
+};
+
+/*
+ * NIST p256 ecdsa keypair
+ */
+#define RSPAMD_CRYPTOBOX_KEYPAIR_SIG_NIST(x) ((struct rspamd_cryptobox_keypair_sig_nist *) (x))
+struct rspamd_cryptobox_keypair_sig_nist {
+ struct rspamd_cryptobox_keypair parent;
+ guchar sk[32];
+ guchar pk[65];
+};
+
+/*
+ * Ed25519 keypair
+ */
+#define RSPAMD_CRYPTOBOX_KEYPAIR_SIG_25519(x) ((struct rspamd_cryptobox_keypair_sig_25519 *) (x))
+struct rspamd_cryptobox_keypair_sig_25519 {
+ struct rspamd_cryptobox_keypair parent;
+ guchar sk[64];
+ guchar pk[32];
+};
+
+/*
+ * Public component of the keypair
+ */
+struct rspamd_cryptobox_pubkey {
+ guchar id[rspamd_cryptobox_HASHBYTES];
+ struct rspamd_cryptobox_nm *nm;
+ enum rspamd_cryptobox_keypair_type type;
+ enum rspamd_cryptobox_mode alg;
+ ref_entry_t ref;
+};
+
+/*
+ * Public p256 ecdh
+ */
+#define RSPAMD_CRYPTOBOX_PUBKEY_NIST(x) ((struct rspamd_cryptobox_pubkey_nist *) (x))
+struct rspamd_cryptobox_pubkey_nist {
+ struct rspamd_cryptobox_pubkey parent;
+ guchar pk[65];
+};
+
+/*
+ * Public curve25519 ecdh
+ */
+#define RSPAMD_CRYPTOBOX_PUBKEY_25519(x) ((struct rspamd_cryptobox_pubkey_25519 *) (x))
+struct rspamd_cryptobox_pubkey_25519 {
+ struct rspamd_cryptobox_pubkey parent;
+ guchar pk[32];
+};
+
+/*
+ * Public p256 ecdsa
+ */
+#define RSPAMD_CRYPTOBOX_PUBKEY_SIG_NIST(x) ((struct rspamd_cryptobox_pubkey_sig_nist *) (x))
+struct rspamd_cryptobox_pubkey_sig_nist {
+ struct rspamd_cryptobox_pubkey parent;
+ guchar pk[65];
+};
+
+/*
+ * Public ed25519
+ */
+#define RSPAMD_CRYPTOBOX_PUBKEY_SIG_25519(x) ((struct rspamd_cryptobox_pubkey_sig_25519 *) (x))
+struct rspamd_cryptobox_pubkey_sig_25519 {
+ struct rspamd_cryptobox_pubkey parent;
+ guchar pk[32];
+};
+
+void rspamd_cryptobox_nm_dtor(struct rspamd_cryptobox_nm *nm);
+
+void rspamd_cryptobox_keypair_dtor(struct rspamd_cryptobox_keypair *kp);
+
+void rspamd_cryptobox_pubkey_dtor(struct rspamd_cryptobox_pubkey *p);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* KEYPAIR_PRIVATE_H_ */
diff --git a/src/libcryptobox/keypairs_cache.c b/src/libcryptobox/keypairs_cache.c
new file mode 100644
index 0000000..0616bb9
--- /dev/null
+++ b/src/libcryptobox/keypairs_cache.c
@@ -0,0 +1,141 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+#include "keypairs_cache.h"
+#include "keypair_private.h"
+#include "libutil/util.h"
+#include "hash.h"
+
+struct rspamd_keypair_elt {
+ struct rspamd_cryptobox_nm *nm;
+ guchar pair[rspamd_cryptobox_HASHBYTES * 2];
+};
+
+struct rspamd_keypair_cache {
+ rspamd_lru_hash_t *hash;
+};
+
+static void
+rspamd_keypair_destroy(gpointer ptr)
+{
+ struct rspamd_keypair_elt *elt = (struct rspamd_keypair_elt *) ptr;
+
+ REF_RELEASE(elt->nm);
+ g_free(elt);
+}
+
+static guint
+rspamd_keypair_hash(gconstpointer ptr)
+{
+ struct rspamd_keypair_elt *elt = (struct rspamd_keypair_elt *) ptr;
+
+ return rspamd_cryptobox_fast_hash(elt->pair, sizeof(elt->pair),
+ rspamd_hash_seed());
+}
+
+static gboolean
+rspamd_keypair_equal(gconstpointer p1, gconstpointer p2)
+{
+ struct rspamd_keypair_elt *e1 = (struct rspamd_keypair_elt *) p1,
+ *e2 = (struct rspamd_keypair_elt *) p2;
+
+ return memcmp(e1->pair, e2->pair, sizeof(e1->pair)) == 0;
+}
+
+struct rspamd_keypair_cache *
+rspamd_keypair_cache_new(guint max_items)
+{
+ struct rspamd_keypair_cache *c;
+
+ g_assert(max_items > 0);
+
+ c = g_malloc0(sizeof(*c));
+ c->hash = rspamd_lru_hash_new_full(max_items, NULL,
+ rspamd_keypair_destroy, rspamd_keypair_hash, rspamd_keypair_equal);
+
+ return c;
+}
+
+void rspamd_keypair_cache_process(struct rspamd_keypair_cache *c,
+ struct rspamd_cryptobox_keypair *lk,
+ struct rspamd_cryptobox_pubkey *rk)
+{
+ struct rspamd_keypair_elt search, *new;
+
+ g_assert(lk != NULL);
+ g_assert(rk != NULL);
+ g_assert(rk->alg == lk->alg);
+ g_assert(rk->type == lk->type);
+ g_assert(rk->type == RSPAMD_KEYPAIR_KEX);
+
+ memset(&search, 0, sizeof(search));
+ memcpy(search.pair, rk->id, rspamd_cryptobox_HASHBYTES);
+ memcpy(&search.pair[rspamd_cryptobox_HASHBYTES], lk->id,
+ rspamd_cryptobox_HASHBYTES);
+ new = rspamd_lru_hash_lookup(c->hash, &search, time(NULL));
+
+ if (rk->nm) {
+ REF_RELEASE(rk->nm);
+ rk->nm = NULL;
+ }
+
+ if (new == NULL) {
+ new = g_malloc0(sizeof(*new));
+
+ if (posix_memalign((void **) &new->nm, 32, sizeof(*new->nm)) != 0) {
+ abort();
+ }
+
+ REF_INIT_RETAIN(new->nm, rspamd_cryptobox_nm_dtor);
+
+ memcpy(new->pair, rk->id, rspamd_cryptobox_HASHBYTES);
+ memcpy(&new->pair[rspamd_cryptobox_HASHBYTES], lk->id,
+ rspamd_cryptobox_HASHBYTES);
+ memcpy(&new->nm->sk_id, lk->id, sizeof(guint64));
+
+ if (rk->alg == RSPAMD_CRYPTOBOX_MODE_25519) {
+ struct rspamd_cryptobox_pubkey_25519 *rk_25519 =
+ RSPAMD_CRYPTOBOX_PUBKEY_25519(rk);
+ struct rspamd_cryptobox_keypair_25519 *sk_25519 =
+ RSPAMD_CRYPTOBOX_KEYPAIR_25519(lk);
+
+ rspamd_cryptobox_nm(new->nm->nm, rk_25519->pk, sk_25519->sk, rk->alg);
+ }
+ else {
+ struct rspamd_cryptobox_pubkey_nist *rk_nist =
+ RSPAMD_CRYPTOBOX_PUBKEY_NIST(rk);
+ struct rspamd_cryptobox_keypair_nist *sk_nist =
+ RSPAMD_CRYPTOBOX_KEYPAIR_NIST(lk);
+
+ rspamd_cryptobox_nm(new->nm->nm, rk_nist->pk, sk_nist->sk, rk->alg);
+ }
+
+ rspamd_lru_hash_insert(c->hash, new, new, time(NULL), -1);
+ }
+
+ g_assert(new != NULL);
+
+ rk->nm = new->nm;
+ REF_RETAIN(rk->nm);
+}
+
+void rspamd_keypair_cache_destroy(struct rspamd_keypair_cache *c)
+{
+ if (c != NULL) {
+ rspamd_lru_hash_destroy(c->hash);
+ g_free(c);
+ }
+}
diff --git a/src/libcryptobox/keypairs_cache.h b/src/libcryptobox/keypairs_cache.h
new file mode 100644
index 0000000..96e356a
--- /dev/null
+++ b/src/libcryptobox/keypairs_cache.h
@@ -0,0 +1,57 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KEYPAIRS_CACHE_H_
+#define KEYPAIRS_CACHE_H_
+
+#include "config.h"
+#include "keypair.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_keypair_cache;
+
+/**
+ * Create new keypair cache of the specified size
+ * @param max_items defines maximum count of elements in the cache
+ * @return new cache
+ */
+struct rspamd_keypair_cache *rspamd_keypair_cache_new(guint max_items);
+
+
+/**
+ * Process local and remote keypair setting beforenm value as appropriate
+ * @param c cache of keypairs
+ * @param lk local key
+ * @param rk remote key
+ */
+void rspamd_keypair_cache_process(struct rspamd_keypair_cache *c,
+ struct rspamd_cryptobox_keypair *lk,
+ struct rspamd_cryptobox_pubkey *rk);
+
+/**
+ * Destroy old keypair cache
+ * @param c cache object
+ */
+void rspamd_keypair_cache_destroy(struct rspamd_keypair_cache *c);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* KEYPAIRS_CACHE_H_ */
diff --git a/src/libcryptobox/macro.S b/src/libcryptobox/macro.S
new file mode 100644
index 0000000..f213f15
--- /dev/null
+++ b/src/libcryptobox/macro.S
@@ -0,0 +1,176 @@
+#include "platform_config.h"
+
+#if !defined(HAVE_SLASHMACRO) && !defined(HAVE_DOLLARMACRO)
+ #error Unknown gnu as macro parameter convention! Run ./configure
+#endif
+
+#if defined(__MACH__)
+ .macro FN name
+ #if defined(HAVE_SLASHMACRO)
+ \name:
+ _\name:
+ #elif defined(HAVE_DOLLARMACRO)
+ $0:
+ _$0:
+ #endif
+ .endm
+
+ .macro FN_EXT name, args, xmmused
+ #if defined(HAVE_SLASHMACRO)
+ FN \name
+ #elif defined(HAVE_DOLLARMACRO)
+ FN $0
+ #endif
+ .endm
+
+ .macro FN_END name
+ .endm
+
+ .macro HIDDEN name
+ #if defined(HAVE_AS_PRIVATE_EXTERN)
+ #if defined(HAVE_SLASHMACRO)
+ .private_extern \name
+ .private_extern _\name
+ #elif defined(HAVE_DOLLARMACRO)
+ .private_extern $0
+ .private_extern _$0
+ #endif
+ #endif
+ .endm
+#else
+ .macro FN name
+ \name:
+ _\name:
+ .endm
+
+ .macro FN_EXT name, args, xmmused
+ FN \name
+ .endm
+
+ .macro FN_END name
+ .size \name, .-\name
+ .size _\name, .-_\name
+ .type \name, @function
+ .type _\name, @function
+ .endm
+
+ .macro HIDDEN name
+ #if defined(HAVE_AS_HIDDEN)
+ .hidden \name
+ .hidden _\name
+ #endif
+ .endm
+
+ /* set NX for stack */
+ .section .note.GNU-stack,"",@progbits
+#endif
+#if defined(__MACH__)
+ .macro SECTION_TEXT
+ .section __TEXT,__text,regular
+ .endm
+
+ .macro SECTION_RODATA
+ .section __TEXT,__text,regular
+ .endm
+#else
+ /* put everything in the code segment to simplify things */
+ .macro SECTION_TEXT
+ .text
+ .endm
+
+ .macro SECTION_RODATA
+ .text
+ .endm
+#endif
+
+/* declare a global function */
+.macro GLOBAL name
+#if defined(HAVE_SLASHMACRO)
+ .globl \name
+ .globl _\name
+#elif defined(HAVE_DOLLARMACRO)
+ .globl $0
+ .globl _$0
+#endif
+.endm
+
+.macro FN_LOCAL_PREFIX name
+#if defined(HAVE_SLASHMACRO)
+ FN LOCAL_PREFIX(\name)
+#elif defined(HAVE_DOLLARMACRO)
+ FN LOCAL_PREFIX($0)
+#endif
+.endm
+
+.macro FN_EXT_LOCAL_PREFIX name, args, xmmused
+#if defined(HAVE_SLASHMACRO)
+ FN_EXT LOCAL_PREFIX(\name), \args, \xmmused
+#elif defined(HAVE_DOLLARMACRO)
+ FN_EXT LOCAL_PREFIX($0), $1, $2
+#endif
+.endm
+
+.macro FN_END_LOCAL_PREFIX name
+#if defined(HAVE_SLASHMACRO)
+ FN_END LOCAL_PREFIX(\name)
+#elif defined(HAVE_DOLLARMACRO)
+ FN_END LOCAL_PREFIX($0)
+#endif
+.endm
+
+.macro GLOBAL_LOCAL_PREFIX name
+#if defined(HAVE_SLASHMACRO)
+ GLOBAL LOCAL_PREFIX(\name)
+ HIDDEN LOCAL_PREFIX(\name)
+#elif defined(HAVE_DOLLARMACRO)
+ GLOBAL LOCAL_PREFIX($0)
+ HIDDEN LOCAL_PREFIX($0)
+#endif
+.endm
+
+.macro GLOBAL_HIDDEN_FN name
+#if defined(HAVE_SLASHMACRO)
+ GLOBAL \name
+ HIDDEN \name
+ FN \name
+#elif defined(HAVE_DOLLARMACRO)
+ GLOBAL $0
+ HIDDEN $0
+ FN $0
+#endif
+.endm
+
+.macro GLOBAL_HIDDEN_FN_EXT name, args, xmmused
+#if defined(HAVE_SLASHMACRO)
+ GLOBAL \name
+ HIDDEN \name
+ FN_EXT \name, \args, \xmmused
+#elif defined(HAVE_DOLLARMACRO)
+ GLOBAL $0
+ HIDDEN $0
+ FN_EXT $0, $1, $2
+#endif
+.endm
+
+/* pic support */
+.macro LOAD_VAR_PIC var, reg
+#if !defined(__LP64__)
+ #if defined(HAVE_SLASHMACRO)
+ call 1f
+ 1:
+ popl \reg
+ leal \var - 1b(\reg), \reg
+ #elif defined(HAVE_DOLLARMACRO)
+ call 1f
+ 1:
+ popl $1
+ leal $0 - 1b($1), $1
+ #endif
+#else
+ #if defined(HAVE_SLASHMACRO)
+ leaq \var(%rip), \reg
+ #elif defined(HAVE_DOLLARMACRO)
+ leaq $0(%rip), $1
+ #endif
+#endif
+.endm
diff --git a/src/libcryptobox/platform_config.h.in b/src/libcryptobox/platform_config.h.in
new file mode 100644
index 0000000..7b7d17d
--- /dev/null
+++ b/src/libcryptobox/platform_config.h.in
@@ -0,0 +1,16 @@
+#ifndef PLATFORM_H_CONFIG
+#define PLATFORM_H_CONFIG
+
+#define ARCH "${ARCH}"
+#define CMAKE_ARCH_${ARCH} 1
+#cmakedefine HAVE_AVX2 1
+#cmakedefine HAVE_AVX 1
+#cmakedefine HAVE_SSE2 1
+#cmakedefine HAVE_SSE41 1
+#cmakedefine HAVE_SSE42 1
+#cmakedefine HAVE_SSE3 1
+#cmakedefine HAVE_SSSE3 1
+#cmakedefine HAVE_SLASHMACRO 1
+#cmakedefine HAVE_DOLLARMACRO 1
+
+#endif \ No newline at end of file