diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
commit | 133a45c109da5310add55824db21af5239951f93 (patch) | |
tree | ba6ac4c0a950a0dda56451944315d66409923918 /src/libcryptobox | |
parent | Initial commit. (diff) | |
download | rspamd-133a45c109da5310add55824db21af5239951f93.tar.xz rspamd-133a45c109da5310add55824db21af5239951f93.zip |
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
27 files changed, 8936 insertions, 0 deletions
diff --git a/src/libcryptobox/AsmOpt.cmake b/src/libcryptobox/AsmOpt.cmake new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/libcryptobox/AsmOpt.cmake diff --git a/src/libcryptobox/CMakeLists.txt b/src/libcryptobox/CMakeLists.txt new file mode 100644 index 0000000..d1c8e3d --- /dev/null +++ b/src/libcryptobox/CMakeLists.txt @@ -0,0 +1,41 @@ +SET(CHACHASRC ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/chacha.c + ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/ref.c) + +SET(BASE64SRC ${CMAKE_CURRENT_SOURCE_DIR}/base64/ref.c + ${CMAKE_CURRENT_SOURCE_DIR}/base64/base64.c) + +IF (HAVE_AVX2) + IF ("${ARCH}" STREQUAL "x86_64") + SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/avx2.S) + MESSAGE(STATUS "Cryptobox: AVX2 support is added (chacha20)") + ENDIF () + SET(BASE64SRC ${BASE64SRC} ${CMAKE_CURRENT_SOURCE_DIR}/base64/avx2.c) + MESSAGE(STATUS "Cryptobox: AVX2 support is added (base64)") +ENDIF (HAVE_AVX2) +IF (HAVE_AVX) + IF ("${ARCH}" STREQUAL "x86_64") + SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/avx.S) + MESSAGE(STATUS "Cryptobox: AVX support is added (chacha20)") + ENDIF () +ENDIF (HAVE_AVX) +IF (HAVE_SSE2) + IF ("${ARCH}" STREQUAL "x86_64") + SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/sse2.S) + MESSAGE(STATUS "Cryptobox: SSE2 support is added (chacha20)") + ENDIF () +ENDIF (HAVE_SSE2) +IF (HAVE_SSE42) + IF ("${ARCH}" STREQUAL "x86_64") + SET(BASE64SRC ${BASE64SRC} ${CMAKE_CURRENT_SOURCE_DIR}/base64/sse42.c) + MESSAGE(STATUS "Cryptobox: SSE42 support is added (base64)") + ENDIF () +ENDIF (HAVE_SSE42) + +CONFIGURE_FILE(platform_config.h.in platform_config.h) +INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}") +SET(LIBCRYPTOBOXSRC ${CMAKE_CURRENT_SOURCE_DIR}/cryptobox.c + ${CMAKE_CURRENT_SOURCE_DIR}/keypair.c + ${CMAKE_CURRENT_SOURCE_DIR}/keypairs_cache.c + ${CMAKE_CURRENT_SOURCE_DIR}/catena/catena.c) + +SET(RSPAMD_CRYPTOBOX ${LIBCRYPTOBOXSRC} ${CHACHASRC} ${BASE64SRC} PARENT_SCOPE) diff --git a/src/libcryptobox/base64/avx2.c b/src/libcryptobox/base64/avx2.c new file mode 100644 index 0000000..38abffc --- /dev/null +++ b/src/libcryptobox/base64/avx2.c @@ -0,0 +1,287 @@ +/*- + * Copyright 2018 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- +Copyright (c) 2013-2015, Alfred Klomp +Copyright (c) 2018, Vsevolod Stakhov +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "cryptobox.h" + +extern const uint8_t base64_table_dec[256]; + +#ifdef RSPAMD_HAS_TARGET_ATTR +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC push_options +#pragma GCC target("avx2") +#endif +#ifndef __SSE2__ +#define __SSE2__ +#endif +#ifndef __SSE__ +#define __SSE__ +#endif +#ifndef __SSE4_2__ +#define __SSE4_2__ +#endif +#ifndef __SSE4_1__ +#define __SSE4_1__ +#endif +#ifndef __SSEE3__ +#define __SSEE3__ +#endif +#ifndef __AVX__ +#define __AVX__ +#endif +#ifndef __AVX2__ +#define __AVX2__ +#endif + +#include <immintrin.h> + +#define CMPGT(s, n) _mm256_cmpgt_epi8((s), _mm256_set1_epi8(n)) +#define CMPEQ(s, n) _mm256_cmpeq_epi8((s), _mm256_set1_epi8(n)) +#define REPLACE(s, n) _mm256_and_si256((s), _mm256_set1_epi8(n)) +#define RANGE(s, a, b) _mm256_andnot_si256(CMPGT((s), (b)), CMPGT((s), (a) -1)) + +static inline __m256i +dec_reshuffle(__m256i in) __attribute__((__target__("avx2"))); + +static inline __m256i +dec_reshuffle(__m256i in) +{ + // in, lower lane, bits, upper case are most significant bits, lower case are least significant bits: + // 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ + // 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG + // 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD + // 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA + + const __m256i merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140)); + // 0000kkkk LLllllll 0000JJJJ JJjjKKKK + // 0000hhhh IIiiiiii 0000GGGG GGggHHHH + // 0000eeee FFffffff 0000DDDD DDddEEEE + // 0000bbbb CCcccccc 0000AAAA AAaaBBBB + + __m256i out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000)); + // 00000000 JJJJJJjj KKKKkkkk LLllllll + // 00000000 GGGGGGgg HHHHhhhh IIiiiiii + // 00000000 DDDDDDdd EEEEeeee FFffffff + // 00000000 AAAAAAaa BBBBbbbb CCcccccc + + // Pack bytes together in each lane: + out = _mm256_shuffle_epi8(out, _mm256_setr_epi8( + 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1, + 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1)); + // 00000000 00000000 00000000 00000000 + // LLllllll KKKKkkkk JJJJJJjj IIiiiiii + // HHHHhhhh GGGGGGgg FFffffff EEEEeeee + // DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa + + // Pack lanes + return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1)); +} + + +#define INNER_LOOP_AVX2 \ + while (inlen >= 45) { \ + __m256i str = _mm256_loadu_si256((__m256i *) c); \ + const __m256i lut_lo = _mm256_setr_epi8( \ + 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \ + 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A, \ + 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \ + 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); \ + const __m256i lut_hi = _mm256_setr_epi8( \ + 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \ + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, \ + 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \ + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); \ + const __m256i lut_roll = _mm256_setr_epi8( \ + 0, 16, 19, 4, -65, -65, -71, -71, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 16, 19, 4, -65, -65, -71, -71, \ + 0, 0, 0, 0, 0, 0, 0, 0); \ + const __m256i mask_2F = _mm256_set1_epi8(0x2f); \ + const __m256i hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), mask_2F); \ + const __m256i lo_nibbles = _mm256_and_si256(str, mask_2F); \ + const __m256i hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles); \ + const __m256i lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles); \ + const __m256i eq_2F = _mm256_cmpeq_epi8(str, mask_2F); \ + const __m256i roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2F, hi_nibbles)); \ + if (!_mm256_testz_si256(lo, hi)) { \ + seen_error = true; \ + break; \ + } \ + str = _mm256_add_epi8(str, roll); \ + str = dec_reshuffle(str); \ + _mm256_storeu_si256((__m256i *) o, str); \ + c += 32; \ + o += 24; \ + outl += 24; \ + inlen -= 32; \ + } + +int base64_decode_avx2(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) __attribute__((__target__("avx2"))); +int base64_decode_avx2(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) +{ + ssize_t ret = 0; + const uint8_t *c = (const uint8_t *) in; + uint8_t *o = (uint8_t *) out; + uint8_t q, carry; + size_t outl = 0; + size_t leftover = 0; + bool seen_error = false; + +repeat: + switch (leftover) { + for (;;) { + case 0: + if (G_LIKELY(!seen_error)) { + INNER_LOOP_AVX2 + } + + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + ret = 0; + break; + } + carry = q << 2; + leftover++; + + case 1: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + ret = 0; + break; + } + *o++ = carry | (q >> 4); + carry = q << 4; + leftover++; + outl++; + + case 2: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + leftover++; + + if (q == 254) { + if (inlen-- != 0) { + leftover = 0; + q = base64_table_dec[*c++]; + ret = ((q == 254) && (inlen == 0)) ? 1 : 0; + break; + } + else { + ret = 1; + break; + } + } + else { + leftover--; + } + /* If we get here, there was an error: */ + break; + } + *o++ = carry | (q >> 2); + carry = q << 6; + leftover++; + outl++; + + case 3: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + /* + * When q == 254, the input char is '='. Return 1 and EOF. + * When q == 255, the input char is invalid. Return 0 and EOF. + */ + if (q == 254 && inlen == 0) { + ret = 1; + leftover = 0; + } + else { + ret = 0; + } + + break; + } + + *o++ = carry | q; + carry = 0; + leftover = 0; + outl++; + } + } + + if (!ret && inlen > 0) { + /* Skip to the next valid character in input */ + while (inlen > 0 && base64_table_dec[*c] >= 254) { + c++; + inlen--; + } + + if (inlen > 0) { + seen_error = false; + goto repeat; + } + } + + *outlen = outl; + + return ret; +} + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC pop_options +#endif +#endif diff --git a/src/libcryptobox/base64/base64.c b/src/libcryptobox/base64/base64.c new file mode 100644 index 0000000..e868924 --- /dev/null +++ b/src/libcryptobox/base64/base64.c @@ -0,0 +1,445 @@ +/* + * Copyright 2023 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "config.h" +#include "cryptobox.h" +#include "base64.h" +#include "platform_config.h" +#include "str_util.h" +#include "util.h" +#include "contrib/libottery/ottery.h" + +extern unsigned cpu_config; +const uint8_t + base64_table_dec[256] = + { + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 62, + 255, + 255, + 255, + 63, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 255, + 255, + 255, + 254, + 255, + 255, + 255, + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 255, + 255, + 255, + 255, + 255, + 255, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, +}; + +static const char base64_alphabet[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; + +typedef struct base64_impl { + unsigned short enabled; + unsigned short min_len; + unsigned int cpu_flags; + const char *desc; + int (*decode)(const char *in, size_t inlen, + unsigned char *out, size_t *outlen); +} base64_impl_t; + +#define BASE64_DECLARE(ext) \ + int base64_decode_##ext(const char *in, size_t inlen, unsigned char *out, size_t *outlen); +#define BASE64_IMPL(cpuflags, min_len, desc, ext) \ + { \ + 0, (min_len), (cpuflags), desc, base64_decode_##ext \ + } + +BASE64_DECLARE(ref); +#define BASE64_REF BASE64_IMPL(0, 0, "ref", ref) + +#ifdef RSPAMD_HAS_TARGET_ATTR +#if defined(HAVE_SSE42) && defined(__x86_64__) +int base64_decode_sse42(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2"))); + +BASE64_DECLARE(sse42); +#define BASE64_SSE42 BASE64_IMPL(CPUID_SSE42, 24, "sse42", sse42) +#endif +#endif + +#ifdef RSPAMD_HAS_TARGET_ATTR +#if defined(HAVE_AVX2) && defined(__x86_64__) +int base64_decode_avx2(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) __attribute__((__target__("avx2"))); + +BASE64_DECLARE(avx2); +#define BASE64_AVX2 BASE64_IMPL(CPUID_AVX2, 128, "avx2", avx2) +#endif +#endif + +static base64_impl_t base64_list[] = { + BASE64_REF, +#ifdef BASE64_SSE42 + BASE64_SSE42, +#endif +#ifdef BASE64_AVX2 + BASE64_AVX2, +#endif +}; + +static const base64_impl_t *base64_ref = &base64_list[0]; + +const char * +base64_load(void) +{ + guint i; + const base64_impl_t *opt_impl = base64_ref; + + /* Enable reference */ + base64_list[0].enabled = true; + + if (cpu_config != 0) { + for (i = 1; i < G_N_ELEMENTS(base64_list); i++) { + if (base64_list[i].cpu_flags & cpu_config) { + base64_list[i].enabled = true; + opt_impl = &base64_list[i]; + } + } + } + + + return opt_impl->desc; +} + +gboolean +rspamd_cryptobox_base64_decode(const gchar *in, gsize inlen, + guchar *out, gsize *outlen) +{ + const base64_impl_t *opt_impl = base64_ref; + + for (gint i = G_N_ELEMENTS(base64_list) - 1; i > 0; i--) { + if (base64_list[i].enabled && base64_list[i].min_len <= inlen) { + opt_impl = &base64_list[i]; + break; + } + } + + return opt_impl->decode(in, inlen, out, outlen); +} + +double +base64_test(bool generic, size_t niters, size_t len, size_t str_len) +{ + size_t cycles; + guchar *in, *out, *tmp; + gdouble t1, t2, total = 0; + gsize outlen; + + g_assert(len > 0); + in = g_malloc(len); + tmp = g_malloc(len); + ottery_rand_bytes(in, len); + + out = rspamd_encode_base64_fold(in, len, str_len, &outlen, + RSPAMD_TASK_NEWLINES_CRLF); + + if (generic) { + base64_list[0].decode(out, outlen, tmp, &len); + } + else { + rspamd_cryptobox_base64_decode(out, outlen, tmp, &len); + } + + g_assert(memcmp(in, tmp, len) == 0); + + for (cycles = 0; cycles < niters; cycles++) { + t1 = rspamd_get_ticks(TRUE); + if (generic) { + base64_list[0].decode(out, outlen, tmp, &len); + } + else { + rspamd_cryptobox_base64_decode(out, outlen, tmp, &len); + } + t2 = rspamd_get_ticks(TRUE); + total += t2 - t1; + } + + g_free(in); + g_free(tmp); + g_free(out); + + return total; +} + + +gboolean +rspamd_cryptobox_base64_is_valid(const gchar *in, gsize inlen) +{ + const guchar *p, *end; + + if (inlen == 0) { + return FALSE; + } + + p = in; + end = in + inlen; + + while (p < end && *p != '=') { + if (!g_ascii_isspace(*p)) { + if (base64_table_dec[*p] == 255) { + return FALSE; + } + } + p++; + } + + return TRUE; +}
\ No newline at end of file diff --git a/src/libcryptobox/base64/base64.h b/src/libcryptobox/base64/base64.h new file mode 100644 index 0000000..f53c80a --- /dev/null +++ b/src/libcryptobox/base64/base64.h @@ -0,0 +1,31 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBCRYPTOBOX_BASE64_BASE64_H_ +#define SRC_LIBCRYPTOBOX_BASE64_BASE64_H_ + +#include "config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +const char *base64_load(void); + +#ifdef __cplusplus +} +#endif + +#endif /* SRC_LIBCRYPTOBOX_BASE64_BASE64_H_ */ diff --git a/src/libcryptobox/base64/ref.c b/src/libcryptobox/base64/ref.c new file mode 100644 index 0000000..61df68e --- /dev/null +++ b/src/libcryptobox/base64/ref.c @@ -0,0 +1,241 @@ +/*- +Copyright (c) 2013-2015, Alfred Klomp +Copyright (c) 2016, Vsevolod Stakhov +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "libutil/util.h" + +extern const uint8_t base64_table_dec[256]; + +#define INNER_LOOP_64 \ + do { \ + uint64_t str, res, dec; \ + bool aligned = rspamd_is_aligned_as(c, str); \ + while (inlen >= 13) { \ + if (aligned) { str = *(uint64_t *) c; } \ + else { \ + memcpy(&str, c, sizeof(str)); \ + } \ + str = GUINT64_TO_BE(str); \ + if ((dec = base64_table_dec[str >> 56]) > 63) { \ + break; \ + } \ + res = dec << 58; \ + if ((dec = base64_table_dec[(str >> 48) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 52; \ + if ((dec = base64_table_dec[(str >> 40) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 46; \ + if ((dec = base64_table_dec[(str >> 32) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 40; \ + if ((dec = base64_table_dec[(str >> 24) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 34; \ + if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 28; \ + if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 22; \ + if ((dec = base64_table_dec[str & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 16; \ + res = GUINT64_FROM_BE(res); \ + memcpy(o, &res, sizeof(res)); \ + c += 8; \ + o += 6; \ + outl += 6; \ + inlen -= 8; \ + } \ + } while (0) + +#define INNER_LOOP_32 \ + do { \ + uint32_t str, res, dec; \ + bool aligned = rspamd_is_aligned_as(c, str); \ + while (inlen >= 8) { \ + if (aligned) { str = *(uint32_t *) c; } \ + else { \ + memcpy(&str, c, sizeof(str)); \ + } \ + str = GUINT32_TO_BE(str); \ + if ((dec = base64_table_dec[str >> 24]) > 63) { \ + break; \ + } \ + res = dec << 26; \ + if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 20; \ + if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 14; \ + if ((dec = base64_table_dec[str & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 8; \ + res = GUINT32_FROM_BE(res); \ + memcpy(o, &res, sizeof(res)); \ + c += 4; \ + o += 3; \ + outl += 3; \ + inlen -= 4; \ + } \ + } while (0) + + +int base64_decode_ref(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) +{ + ssize_t ret = 0; + const uint8_t *c = (const uint8_t *) in; + uint8_t *o = (uint8_t *) out; + uint8_t q, carry; + size_t outl = 0; + size_t leftover = 0; + +repeat: + switch (leftover) { + for (;;) { + case 0: +#if defined(__LP64__) + INNER_LOOP_64; +#else + INNER_LOOP_32; +#endif + + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + ret = 0; + break; + } + carry = (uint8_t) (q << 2); + leftover++; + + case 1: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + ret = 0; + break; + } + *o++ = carry | (q >> 4); + carry = (uint8_t) (q << 4); + leftover++; + outl++; + + case 2: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + leftover++; + + if (q == 254) { + if (inlen-- != 0) { + leftover = 0; + q = base64_table_dec[*c++]; + ret = ((q == 254) && (inlen == 0)) ? 1 : 0; + break; + } + else { + ret = 1; + break; + } + } + else { + leftover--; + } + /* If we get here, there was an error: */ + break; + } + *o++ = carry | (q >> 2); + carry = (uint8_t) (q << 6); + leftover++; + outl++; + + case 3: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + /* + * When q == 254, the input char is '='. Return 1 and EOF. + * When q == 255, the input char is invalid. Return 0 and EOF. + */ + if (q == 254 && inlen == 0) { + ret = 1; + leftover = 0; + } + else { + ret = 0; + } + + break; + } + + *o++ = carry | q; + carry = 0; + leftover = 0; + outl++; + } + } + + if (!ret && inlen > 0) { + /* Skip to the next valid character in input */ + while (inlen > 0 && base64_table_dec[*c] >= 254) { + c++; + inlen--; + } + + if (inlen > 0) { + goto repeat; + } + } + + *outlen = outl; + + return ret; +} diff --git a/src/libcryptobox/base64/sse42.c b/src/libcryptobox/base64/sse42.c new file mode 100644 index 0000000..36070ab --- /dev/null +++ b/src/libcryptobox/base64/sse42.c @@ -0,0 +1,268 @@ +/*- + * Copyright 2017 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- +Copyright (c) 2013-2015, Alfred Klomp +Copyright (c) 2016, Vsevolod Stakhov +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "cryptobox.h" + +extern const uint8_t base64_table_dec[256]; + +#ifdef RSPAMD_HAS_TARGET_ATTR +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC push_options +#pragma GCC target("sse4.2") +#endif +#ifndef __SSE2__ +#define __SSE2__ +#endif +#ifndef __SSE__ +#define __SSE__ +#endif +#ifndef __SSE4_2__ +#define __SSE4_2__ +#endif +#ifndef __SSE4_1__ +#define __SSE4_1__ +#endif +#ifndef __SSEE3__ +#define __SSEE3__ +#endif +#include <xmmintrin.h> +#include <nmmintrin.h> + + +static inline __m128i +dec_reshuffle(__m128i in) __attribute__((__target__("sse4.2"))); + +static inline __m128i dec_reshuffle(__m128i in) +{ + // Mask in a single byte per shift: + const __m128i maskB2 = _mm_set1_epi32(0x003F0000); + const __m128i maskB1 = _mm_set1_epi32(0x00003F00); + + // Pack bytes together: + __m128i out = _mm_srli_epi32(in, 16); + + out = _mm_or_si128(out, _mm_srli_epi32(_mm_and_si128(in, maskB2), 2)); + + out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, maskB1), 12)); + + out = _mm_or_si128(out, _mm_slli_epi32(in, 26)); + + // Reshuffle and repack into 12-byte output format: + return _mm_shuffle_epi8(out, _mm_setr_epi8( + 3, 2, 1, + 7, 6, 5, + 11, 10, 9, + 15, 14, 13, + -1, -1, -1, -1)); +} + +#define CMPGT(s, n) _mm_cmpgt_epi8((s), _mm_set1_epi8(n)) + +#define INNER_LOOP_SSE42 \ + while (inlen >= 24) { \ + __m128i str = _mm_loadu_si128((__m128i *) c); \ + const __m128i lut = _mm_setr_epi8( \ + 19, 16, 4, 4, \ + 4, 4, 4, 4, \ + 4, 4, 4, 4, \ + 0, 0, -71, -65); \ + const __m128i range = _mm_setr_epi8( \ + '+', '+', \ + '+', '+', \ + '+', '+', \ + '+', '+', \ + '/', '/', \ + '0', '9', \ + 'A', 'Z', \ + 'a', 'z'); \ + if (_mm_cmpistrc(range, str, _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_NEGATIVE_POLARITY)) { \ + seen_error = true; \ + break; \ + } \ + __m128i indices = _mm_subs_epu8(str, _mm_set1_epi8(46)); \ + __m128i mask45 = CMPGT(str, 64); \ + __m128i mask5 = CMPGT(str, 96); \ + indices = _mm_andnot_si128(mask45, indices); \ + mask45 = _mm_add_epi8(_mm_slli_epi16(_mm_abs_epi8(mask45), 4), mask45); \ + indices = _mm_add_epi8(indices, mask45); \ + indices = _mm_add_epi8(indices, mask5); \ + __m128i delta = _mm_shuffle_epi8(lut, indices); \ + str = _mm_add_epi8(str, delta); \ + str = dec_reshuffle(str); \ + _mm_storeu_si128((__m128i *) o, str); \ + c += 16; \ + o += 12; \ + outl += 12; \ + inlen -= 16; \ + } + +int base64_decode_sse42(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2"))); +int base64_decode_sse42(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) +{ + ssize_t ret = 0; + const uint8_t *c = (const uint8_t *) in; + uint8_t *o = (uint8_t *) out; + uint8_t q, carry; + size_t outl = 0; + size_t leftover = 0; + bool seen_error = false; + +repeat: + switch (leftover) { + for (;;) { + case 0: + if (G_LIKELY(!seen_error)) { + INNER_LOOP_SSE42 + } + + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + ret = 0; + break; + } + carry = q << 2; + leftover++; + + case 1: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + ret = 0; + break; + } + *o++ = carry | (q >> 4); + carry = q << 4; + leftover++; + outl++; + + case 2: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + leftover++; + + if (q == 254) { + if (inlen-- != 0) { + leftover = 0; + q = base64_table_dec[*c++]; + ret = ((q == 254) && (inlen == 0)) ? 1 : 0; + break; + } + else { + ret = 1; + break; + } + } + else { + leftover--; + } + /* If we get here, there was an error: */ + break; + } + *o++ = carry | (q >> 2); + carry = q << 6; + leftover++; + outl++; + + case 3: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + /* + * When q == 254, the input char is '='. Return 1 and EOF. + * When q == 255, the input char is invalid. Return 0 and EOF. + */ + if (q == 254 && inlen == 0) { + ret = 1; + leftover = 0; + } + else { + ret = 0; + } + + break; + } + + *o++ = carry | q; + carry = 0; + leftover = 0; + outl++; + } + } + + if (!ret && inlen > 0) { + /* Skip to the next valid character in input */ + while (inlen > 0 && base64_table_dec[*c] >= 254) { + c++; + inlen--; + } + + if (inlen > 0) { + seen_error = false; + goto repeat; + } + } + + *outlen = outl; + + return ret; +} + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC pop_options +#endif +#endif diff --git a/src/libcryptobox/catena/LICENSE b/src/libcryptobox/catena/LICENSE new file mode 100644 index 0000000..ff22dc8 --- /dev/null +++ b/src/libcryptobox/catena/LICENSE @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2014 cforler + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/libcryptobox/catena/README.md b/src/libcryptobox/catena/README.md new file mode 100644 index 0000000..4a0f948 --- /dev/null +++ b/src/libcryptobox/catena/README.md @@ -0,0 +1,18 @@ +Catena +====== +Catena is a memory-consuming password scrambler that excellently +thwarts massively parallel attacks on cheap memory-constrained +hardware, such as recent graphical processing units (GPUs). +Furthermore, Catena provides resistance against cache-timing attacks, since +its memory-access pattern is password-independent. + +Academic paper: +<a href="http://www.uni-weimar.de/fileadmin/user/fak/medien/professuren/Mediensicherheit/Research/Publications/catena-v3.1.pdf">catena-v3.1.pdf</a> + +Rspamd specific +--------------- + +Rspamd implements Catena-Butterfly using full blake2b hash implemented in the +cryptobox. + +Original code: https://github.com/medsec/catena
\ No newline at end of file diff --git a/src/libcryptobox/catena/catena.c b/src/libcryptobox/catena/catena.c new file mode 100644 index 0000000..7e066dd --- /dev/null +++ b/src/libcryptobox/catena/catena.c @@ -0,0 +1,444 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * Copyright (c) 2014 cforler + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "config.h" +#include "catena.h" + +#include <sodium.h> + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define TO_LITTLE_ENDIAN_64(n) (n) +#define TO_LITTLE_ENDIAN_32(n) (n) +#else +#define TO_LITTLE_ENDIAN_64 GUINT64_SWAP_LE_BE +#define TO_LITTLE_ENDIAN_32 GUINT32_SWAP_LE_BE +#endif + +/* Recommended default values */ +#define H_LEN CATENA_HLEN +#define KEY_LEN 16 + +const uint8_t VERSION_ID[] = "Butterfly-Full"; +const uint8_t LAMBDA = 4; +const uint8_t GARLIC = 16; +const uint8_t MIN_GARLIC = 16; + +/* + * Hash part + */ + +static inline void +__Hash1(const uint8_t *input, const uint32_t inputlen, + uint8_t hash[H_LEN]) +{ + crypto_generichash_blake2b_state ctx; + crypto_generichash_blake2b_init(&ctx, NULL, 0, H_LEN); + crypto_generichash_blake2b_update(&ctx, input, inputlen); + crypto_generichash_blake2b_final(&ctx, hash, H_LEN); +} + +/***************************************************/ + +static inline void __Hash2(const uint8_t *i1, const uint8_t i1len, const uint8_t *i2, + const uint8_t i2len, uint8_t hash[H_LEN]) +{ + crypto_generichash_blake2b_state ctx; + + crypto_generichash_blake2b_init(&ctx, NULL, 0, H_LEN); + crypto_generichash_blake2b_update(&ctx, i1, i1len); + crypto_generichash_blake2b_update(&ctx, i2, i2len); + crypto_generichash_blake2b_final(&ctx, hash, H_LEN); +} + +/***************************************************/ + +static inline void __Hash3(const uint8_t *i1, const uint8_t i1len, const uint8_t *i2, + const uint8_t i2len, const uint8_t *i3, const uint8_t i3len, + uint8_t hash[H_LEN]) +{ + crypto_generichash_blake2b_state ctx; + + crypto_generichash_blake2b_init(&ctx, NULL, 0, H_LEN); + crypto_generichash_blake2b_update(&ctx, i1, i1len); + crypto_generichash_blake2b_update(&ctx, i2, i2len); + crypto_generichash_blake2b_update(&ctx, i3, i3len); + crypto_generichash_blake2b_final(&ctx, hash, H_LEN); +} + +/***************************************************/ + +static inline void __Hash4(const uint8_t *i1, const uint8_t i1len, const uint8_t *i2, + const uint8_t i2len, const uint8_t *i3, const uint8_t i3len, + const uint8_t *i4, const uint8_t i4len, uint8_t hash[H_LEN]) +{ + crypto_generichash_blake2b_state ctx; + + crypto_generichash_blake2b_init(&ctx, NULL, 0, H_LEN); + crypto_generichash_blake2b_update(&ctx, i1, i1len); + crypto_generichash_blake2b_update(&ctx, i2, i2len); + crypto_generichash_blake2b_update(&ctx, i3, i3len); + crypto_generichash_blake2b_update(&ctx, i4, i4len); + crypto_generichash_blake2b_final(&ctx, hash, H_LEN); +} + +/***************************************************/ + +static inline void __Hash5(const uint8_t *i1, const uint8_t i1len, const uint8_t *i2, + const uint8_t i2len, const uint8_t *i3, const uint8_t i3len, + const uint8_t *i4, const uint8_t i4len, const uint8_t *i5, + const uint8_t i5len, uint8_t hash[H_LEN]) +{ + crypto_generichash_blake2b_state ctx; + + crypto_generichash_blake2b_init(&ctx, NULL, 0, H_LEN); + crypto_generichash_blake2b_update(&ctx, i1, i1len); + crypto_generichash_blake2b_update(&ctx, i2, i2len); + crypto_generichash_blake2b_update(&ctx, i3, i3len); + crypto_generichash_blake2b_update(&ctx, i4, i4len); + crypto_generichash_blake2b_update(&ctx, i5, i5len); + crypto_generichash_blake2b_final(&ctx, hash, H_LEN); +} + +static inline void +__HashFast(int vindex, const uint8_t *i1, const uint8_t *i2, + uint8_t hash[H_LEN]) +{ + __Hash2(i1, H_LEN, i2, H_LEN, hash); +} + +static void __ResetState(void) +{ +} + +/* + * Misc utils + */ +const uint8_t ZERO8[H_LEN] = {0}; + +/* see: http://en.wikipedia.org/wiki/Xorshift#Variations */ +static int p; +static uint64_t s[16]; + +static void +initXSState(const uint8_t *a, const uint8_t *b) +{ + p = 0; + + for (int i = 0; i < 8; i++) { + s[i] = UINT64_C(0); + s[i + 8] = UINT64_C(0); + + for (int j = 0; j < 8; j++) { + s[i] |= ((uint64_t) a[i * 8 + j]) << j * 8; + s[i + 8] |= ((uint64_t) b[i * 8 + j]) << j * 8; + } + } +} + +static uint64_t +xorshift1024star(void) +{ + uint64_t s0 = s[p]; + uint64_t s1 = s[p = (p + 1) & 15]; + s1 ^= s1 << 31; + s1 ^= s1 >> 11; + s0 ^= s0 >> 30; + return (s[p] = s0 ^ s1) * UINT64_C(1181783497276652981); +} + +static void +H_INIT(const uint8_t *x, const uint16_t xlen, uint8_t *vm1, uint8_t *vm2) +{ + const uint8_t l = 2; + uint8_t *tmp = (uint8_t *) g_malloc(l * H_LEN); + + for (uint8_t i = 0; i != l; ++i) { + __Hash2(&i, 1, x, xlen, tmp + i * H_LEN); + } + + memcpy(vm1, tmp, H_LEN); + memcpy(vm2, tmp + (l / 2 * H_LEN), H_LEN); + g_free(tmp); +} + +static void +H_First(const uint8_t *i1, const uint8_t *i2, uint8_t *hash) +{ + uint8_t i = 0; + uint8_t *x = (uint8_t *) g_malloc(H_LEN); + + __ResetState(); + __Hash2(i1, H_LEN, i2, H_LEN, x); + __Hash2(&i, 1, x, H_LEN, hash); + g_free(x); +} + +static inline void +initmem(const uint8_t x[H_LEN], const uint64_t c, uint8_t *r) +{ + uint8_t *vm2 = (uint8_t *) g_malloc(H_LEN); + uint8_t *vm1 = (uint8_t *) g_malloc(H_LEN); + + H_INIT(x, H_LEN, vm1, vm2); + __ResetState(); + __HashFast(0, vm1, vm2, r); + __HashFast(1, r, vm1, r + H_LEN); + + /* Top row */ + for (uint64_t i = 2; i < c; i++) { + __HashFast(i, r + (i - 1) * H_LEN, r + (i - 2) * H_LEN, r + i * H_LEN); + } + + g_free(vm2); + g_free(vm1); +} + +static inline void +catena_gamma(const uint8_t garlic, const uint8_t *salt, + const uint8_t saltlen, uint8_t *r) +{ + const uint64_t q = UINT64_C(1) << ((3 * garlic + 3) / 4); + + uint64_t i, j, j2; + uint8_t *tmp = g_malloc(H_LEN); + uint8_t *tmp2 = g_malloc(H_LEN); + + __Hash1(salt, saltlen, tmp); + __Hash1(tmp, H_LEN, tmp2); + initXSState(tmp, tmp2); + + __ResetState(); + for (i = 0; i < q; i++) { + j = xorshift1024star() >> (64 - garlic); + j2 = xorshift1024star() >> (64 - garlic); + __HashFast(i, r + j * H_LEN, r + j2 * H_LEN, r + j * H_LEN); + } + + g_free(tmp); + g_free(tmp2); +} + +static void +XOR(const uint8_t *input1, const uint8_t *input2, uint8_t *output) +{ + uint32_t i; + + for (i = 0; i < H_LEN; i++) { + output[i] = input1[i] ^ input2[i]; + } +} + +/* + * Butterfly part + */ +/* + * Sigma function that defines the diagonal connections of a DBG + * diagonal front: flip the (g-i)th bit (Inverse Butterfly Graph) + * diagonal back: flip the i-(g-1)th bit (Regular Butterfly Graph) + */ +static uint64_t +sigma(const uint8_t g, const uint64_t i, const uint64_t j) +{ + if (i < g) { + return (j ^ (UINT64_C(1) << (g - 1 - i))); /* diagonal front */ + } + else { + return (j ^ (UINT64_C(1) << (i - (g - 1)))); /* diagonal back */ + } +} + +/*calculate actual index from level and element index*/ +static uint64_t +idx(uint64_t i, uint64_t j, uint8_t co, uint64_t c, uint64_t m) +{ + i += co; + if (i % 3 == 0) { + return j; + } + else if (i % 3 == 1) { + if (j < m) { + /* still fits in the array */ + return j + c; + } + else { + /* start overwriting elements at the beginning */ + return j - m; + } + } + /* i % 3 == 2 */ + return j + m; +} + +/* + * Computes the hash of x using a Double Butterfly Graph, + * that forms as (2^g,\lamba)-Superconcentrator + */ +static void +Flap(const uint8_t x[H_LEN], const uint8_t lambda, const uint8_t garlic, + const uint8_t *salt, const uint8_t saltlen, uint8_t h[H_LEN]) +{ + const uint64_t c = UINT64_C(1) << garlic; + const uint64_t m = UINT64_C(1) << (garlic - 1); /* 0.5 * 2^g */ + const uint32_t l = 2 * garlic; + + uint8_t *r = g_malloc((c + m) * H_LEN); + uint8_t *tmp = g_malloc(H_LEN); + uint64_t i, j; + uint8_t k; + uint8_t co = 0; /* carry over from last iteration */ + + /* Top row */ + initmem(x, c, r); + + /*Gamma Function*/ + catena_gamma(garlic, salt, saltlen, r); + + /* DBH */ + for (k = 0; k < lambda; k++) { + for (i = 1; i < l; i++) { + XOR(r + idx(i - 1, c - 1, co, c, m) * H_LEN, + r + idx(i - 1, 0, co, c, m) * H_LEN, tmp); + + /* + * r0 := H(tmp || vsigma(g,i-1,0) ) + * __Hash2(tmp, H_LEN, r+idx(i-1,sigma(garlic,i-1,0),co,c,m) * H_LEN, H_LEN, + * r+idx(i,0,co,c,m) *H_LEN); + */ + H_First(tmp, + r + idx(i - 1, sigma(garlic, i - 1, 0), co, c, m) * H_LEN, + r + idx(i, 0, co, c, m) * H_LEN); + __ResetState(); + + /* vertices */ + for (j = 1; j < c; j++) { + /* tmp:= rj-1 XOR vj */ + XOR(r + idx(i, j - 1, co, c, m) * H_LEN, + r + idx(i - 1, j, co, c, m) * H_LEN, tmp); + /* rj := H(tmp || vsigma(g,i-1,j)) */ + __HashFast(j, tmp, + r + idx(i - 1, sigma(garlic, i - 1, j), co, c, m) * H_LEN, + r + idx(i, j, co, c, m) * H_LEN); + } + } + co = (co + (i - 1)) % 3; + } + + memcpy(h, r + idx(0, c - 1, co, c, m) * H_LEN, H_LEN); + g_free(r); + g_free(tmp); +} + +static int +__Catena(const uint8_t *pwd, const uint32_t pwdlen, + const uint8_t *salt, const uint8_t saltlen, const uint8_t *data, + const uint32_t datalen, const uint8_t lambda, const uint8_t min_garlic, + const uint8_t garlic, const uint8_t hashlen, const uint8_t client, + const uint8_t tweak_id, uint8_t *hash) +{ + uint8_t x[H_LEN]; + uint8_t hv[H_LEN]; + uint8_t t[4]; + uint8_t c; + + if ((hashlen > H_LEN) || (garlic > 63) || (min_garlic > garlic) || (lambda == 0) || (min_garlic == 0)) { + return -1; + } + + /*Compute H(V)*/ + __Hash1(VERSION_ID, strlen((char *) VERSION_ID), hv); + + /* Compute Tweak */ + t[0] = tweak_id; + t[1] = lambda; + t[2] = hashlen; + t[3] = saltlen; + + /* Compute H(AD) */ + __Hash1((uint8_t *) data, datalen, x); + + /* Compute the initial value to hash */ + __Hash5(hv, H_LEN, t, 4, x, H_LEN, pwd, pwdlen, salt, saltlen, x); + + /*Overwrite Password if enabled*/ +#ifdef OVERWRITE + erasepwd(pwd, pwdlen); +#endif + + Flap(x, lambda, (min_garlic + 1) / 2, salt, saltlen, x); + + for (c = min_garlic; c <= garlic; c++) { + Flap(x, lambda, c, salt, saltlen, x); + if ((c == garlic) && (client == CLIENT)) { + memcpy(hash, x, H_LEN); + return 0; + } + __Hash2(&c, 1, x, H_LEN, x); + memset(x + hashlen, 0, H_LEN - hashlen); + } + + memcpy(hash, x, hashlen); + + return 0; +} + +/***************************************************/ + +int catena(const uint8_t *pwd, const uint32_t pwdlen, const uint8_t *salt, + const uint8_t saltlen, const uint8_t *data, const uint32_t datalen, + const uint8_t lambda, const uint8_t min_garlic, const uint8_t garlic, + const uint8_t hashlen, uint8_t *hash) +{ + return __Catena(pwd, pwdlen, salt, saltlen, data, datalen, lambda, + min_garlic, garlic, hashlen, REGULAR, PASSWORD_HASHING_MODE, hash); +} + +int simple_catena(const uint8_t *pwd, const uint32_t pwdlen, + const uint8_t *salt, const uint8_t saltlen, + const uint8_t *data, const uint32_t datalen, + uint8_t hash[H_LEN]) +{ + return __Catena(pwd, pwdlen, salt, saltlen, data, datalen, + LAMBDA, MIN_GARLIC, GARLIC, H_LEN, + REGULAR, PASSWORD_HASHING_MODE, hash); +} + +int catena_test(void) +{ + /* From catena-v3.1 spec */ + guint8 pw[] = {0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64}; + guint8 salt[] = {0x73, 0x61, 0x6c, 0x74}; + guint8 ad[] = {0x64, 0x61, 0x74, 0x61}; + guint8 expected[] = { + 0x20, 0xc5, 0x91, 0x93, 0x8f, 0xc3, 0xaf, 0xcc, 0x3b, 0xba, 0x91, 0xd2, 0xfb, + 0x84, 0xbf, 0x7b, 0x44, 0x04, 0xf9, 0x4c, 0x45, 0xed, 0x4d, 0x11, 0xa7, 0xe2, + 0xb4, 0x12, 0x3e, 0xab, 0x0b, 0x77, 0x4a, 0x12, 0xb4, 0x22, 0xd0, 0xda, 0xb5, + 0x25, 0x29, 0x02, 0xfc, 0x54, 0x47, 0xea, 0x82, 0x63, 0x8c, 0x1a, 0xfb, 0xa7, + 0xa9, 0x94, 0x24, 0x13, 0x0e, 0x44, 0x36, 0x3b, 0x9d, 0x9f, 0xc9, 0x60}; + guint8 real[H_LEN]; + + if (catena(pw, sizeof(pw), salt, sizeof(salt), ad, sizeof(ad), + 4, 10, 10, H_LEN, real) != 0) { + return -1; + } + + return memcmp(real, expected, H_LEN); +} diff --git a/src/libcryptobox/catena/catena.h b/src/libcryptobox/catena/catena.h new file mode 100644 index 0000000..1fcea21 --- /dev/null +++ b/src/libcryptobox/catena/catena.h @@ -0,0 +1,62 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBCRYPTOBOX_CATENA_CATENA_H_ +#define SRC_LIBCRYPTOBOX_CATENA_CATENA_H_ + +/* Modes */ +#define PASSWORD_HASHING_MODE 0 +#define KEY_DERIVATION_MODE 1 +#define REGULAR 0 +#define CLIENT 1 + +#define CATENA_HLEN 64 + +#ifdef __cplusplus +extern "C" { +#endif + +int catena(const uint8_t *pwd, const uint32_t pwdlen, + const uint8_t *salt, const uint8_t saltlen, + const uint8_t *data, const uint32_t datalen, + const uint8_t lambda, const uint8_t min_garlic, + const uint8_t garlic, const uint8_t hashlen, uint8_t *hash); + +/** + * Simple interface for catena PBKDF + * @param pwd password + * @param pwdlen length of password + * @param salt salt + * @param saltlen length of salt + * @param data additional data + * @param datalen length of additional data + * @param hash output hash + * @return 0 if hash is generated, -1 in case of error + */ +int simple_catena(const uint8_t *pwd, const uint32_t pwdlen, + const uint8_t *salt, const uint8_t saltlen, + const uint8_t *data, const uint32_t datalen, + uint8_t hash[CATENA_HLEN]); + +/** + * Run a quick test on catena implementation + */ +int catena_test(void); + +#ifdef __cplusplus +} +#endif + +#endif /* SRC_LIBCRYPTOBOX_CATENA_CATENA_H_ */ diff --git a/src/libcryptobox/chacha20/avx.S b/src/libcryptobox/chacha20/avx.S new file mode 100644 index 0000000..7689b84 --- /dev/null +++ b/src/libcryptobox/chacha20/avx.S @@ -0,0 +1,614 @@ +#include "../macro.S" +#include "constants.S" +SECTION_TEXT + +GLOBAL_HIDDEN_FN chacha_blocks_avx +chacha_blocks_avx_local: +pushq %rbx +pushq %rbp +movq %rsp, %rbp +andq $~63, %rsp +subq $512, %rsp +LOAD_VAR_PIC chacha_constants, %rax +vmovdqa 0(%rax), %xmm8 +vmovdqa 16(%rax), %xmm6 +vmovdqa 32(%rax), %xmm7 +vmovdqu 0(%rdi), %xmm9 +vmovdqu 16(%rdi), %xmm10 +vmovdqu 32(%rdi), %xmm11 +movq 48(%rdi), %rax +movq $1, %r9 +vmovdqa %xmm8, 0(%rsp) +vmovdqa %xmm9, 16(%rsp) +vmovdqa %xmm10, 32(%rsp) +vmovdqa %xmm11, 48(%rsp) +vmovdqa %xmm6, 80(%rsp) +vmovdqa %xmm7, 96(%rsp) +movq %rax, 64(%rsp) +cmpq $256, %rcx +jb chacha_blocks_avx_below256 +vpshufd $0x00, %xmm8, %xmm0 +vpshufd $0x55, %xmm8, %xmm1 +vpshufd $0xaa, %xmm8, %xmm2 +vpshufd $0xff, %xmm8, %xmm3 +vmovdqa %xmm0, 128(%rsp) +vmovdqa %xmm1, 144(%rsp) +vmovdqa %xmm2, 160(%rsp) +vmovdqa %xmm3, 176(%rsp) +vpshufd $0x00, %xmm9, %xmm0 +vpshufd $0x55, %xmm9, %xmm1 +vpshufd $0xaa, %xmm9, %xmm2 +vpshufd $0xff, %xmm9, %xmm3 +vmovdqa %xmm0, 192(%rsp) +vmovdqa %xmm1, 208(%rsp) +vmovdqa %xmm2, 224(%rsp) +vmovdqa %xmm3, 240(%rsp) +vpshufd $0x00, %xmm10, %xmm0 +vpshufd $0x55, %xmm10, %xmm1 +vpshufd $0xaa, %xmm10, %xmm2 +vpshufd $0xff, %xmm10, %xmm3 +vmovdqa %xmm0, 256(%rsp) +vmovdqa %xmm1, 272(%rsp) +vmovdqa %xmm2, 288(%rsp) +vmovdqa %xmm3, 304(%rsp) +vpshufd $0xaa, %xmm11, %xmm0 +vpshufd $0xff, %xmm11, %xmm1 +vmovdqa %xmm0, 352(%rsp) +vmovdqa %xmm1, 368(%rsp) +jmp chacha_blocks_avx_atleast256 +.p2align 6,,63 +nop +nop +nop +nop +nop +chacha_blocks_avx_atleast256: +movq 48(%rsp), %rax +leaq 1(%rax), %r8 +leaq 2(%rax), %r9 +leaq 3(%rax), %r10 +leaq 4(%rax), %rbx +movl %eax, 320(%rsp) +movl %r8d, 4+320(%rsp) +movl %r9d, 8+320(%rsp) +movl %r10d, 12+320(%rsp) +shrq $32, %rax +shrq $32, %r8 +shrq $32, %r9 +shrq $32, %r10 +movl %eax, 336(%rsp) +movl %r8d, 4+336(%rsp) +movl %r9d, 8+336(%rsp) +movl %r10d, 12+336(%rsp) +movq %rbx, 48(%rsp) +movq 64(%rsp), %rax +vmovdqa 128(%rsp), %xmm0 +vmovdqa 144(%rsp), %xmm1 +vmovdqa 160(%rsp), %xmm2 +vmovdqa 176(%rsp), %xmm3 +vmovdqa 192(%rsp), %xmm4 +vmovdqa 208(%rsp), %xmm5 +vmovdqa 224(%rsp), %xmm6 +vmovdqa 240(%rsp), %xmm7 +vmovdqa 256(%rsp), %xmm8 +vmovdqa 272(%rsp), %xmm9 +vmovdqa 288(%rsp), %xmm10 +vmovdqa 304(%rsp), %xmm11 +vmovdqa 320(%rsp), %xmm12 +vmovdqa 336(%rsp), %xmm13 +vmovdqa 352(%rsp), %xmm14 +vmovdqa 368(%rsp), %xmm15 +chacha_blocks_avx_mainloop1: +vpaddd %xmm0, %xmm4, %xmm0 +vpaddd %xmm1, %xmm5, %xmm1 +vpxor %xmm12, %xmm0, %xmm12 +vpxor %xmm13, %xmm1, %xmm13 +vpaddd %xmm2, %xmm6, %xmm2 +vpaddd %xmm3, %xmm7, %xmm3 +vpxor %xmm14, %xmm2, %xmm14 +vpxor %xmm15, %xmm3, %xmm15 +vpshufb 80(%rsp), %xmm12, %xmm12 +vpshufb 80(%rsp), %xmm13, %xmm13 +vpaddd %xmm8, %xmm12, %xmm8 +vpaddd %xmm9, %xmm13, %xmm9 +vpshufb 80(%rsp), %xmm14, %xmm14 +vpshufb 80(%rsp), %xmm15, %xmm15 +vpaddd %xmm10, %xmm14, %xmm10 +vpaddd %xmm11, %xmm15, %xmm11 +vmovdqa %xmm12, 112(%rsp) +vpxor %xmm4, %xmm8, %xmm4 +vpxor %xmm5, %xmm9, %xmm5 +vpslld $ 12, %xmm4, %xmm12 +vpsrld $20, %xmm4, %xmm4 +vpxor %xmm4, %xmm12, %xmm4 +vpslld $ 12, %xmm5, %xmm12 +vpsrld $20, %xmm5, %xmm5 +vpxor %xmm5, %xmm12, %xmm5 +vpxor %xmm6, %xmm10, %xmm6 +vpxor %xmm7, %xmm11, %xmm7 +vpslld $ 12, %xmm6, %xmm12 +vpsrld $20, %xmm6, %xmm6 +vpxor %xmm6, %xmm12, %xmm6 +vpslld $ 12, %xmm7, %xmm12 +vpsrld $20, %xmm7, %xmm7 +vpxor %xmm7, %xmm12, %xmm7 +vpaddd %xmm0, %xmm4, %xmm0 +vpaddd %xmm1, %xmm5, %xmm1 +vpxor 112(%rsp), %xmm0, %xmm12 +vpxor %xmm13, %xmm1, %xmm13 +vpaddd %xmm2, %xmm6, %xmm2 +vpaddd %xmm3, %xmm7, %xmm3 +vpxor %xmm14, %xmm2, %xmm14 +vpxor %xmm15, %xmm3, %xmm15 +vpshufb 96(%rsp), %xmm12, %xmm12 +vpshufb 96(%rsp), %xmm13, %xmm13 +vpaddd %xmm8, %xmm12, %xmm8 +vpaddd %xmm9, %xmm13, %xmm9 +vpshufb 96(%rsp), %xmm14, %xmm14 +vpshufb 96(%rsp), %xmm15, %xmm15 +vpaddd %xmm10, %xmm14, %xmm10 +vpaddd %xmm11, %xmm15, %xmm11 +vmovdqa %xmm12, 112(%rsp) +vpxor %xmm4, %xmm8, %xmm4 +vpxor %xmm5, %xmm9, %xmm5 +vpslld $ 7, %xmm4, %xmm12 +vpsrld $25, %xmm4, %xmm4 +vpxor %xmm4, %xmm12, %xmm4 +vpslld $ 7, %xmm5, %xmm12 +vpsrld $25, %xmm5, %xmm5 +vpxor %xmm5, %xmm12, %xmm5 +vpxor %xmm6, %xmm10, %xmm6 +vpxor %xmm7, %xmm11, %xmm7 +vpslld $ 7, %xmm6, %xmm12 +vpsrld $25, %xmm6, %xmm6 +vpxor %xmm6, %xmm12, %xmm6 +vpslld $ 7, %xmm7, %xmm12 +vpsrld $25, %xmm7, %xmm7 +vpxor %xmm7, %xmm12, %xmm7 +vpaddd %xmm0, %xmm5, %xmm0 +vpaddd %xmm1, %xmm6, %xmm1 +vpxor %xmm15, %xmm0, %xmm15 +vpxor 112(%rsp), %xmm1, %xmm12 +vpaddd %xmm2, %xmm7, %xmm2 +vpaddd %xmm3, %xmm4, %xmm3 +vpxor %xmm13, %xmm2, %xmm13 +vpxor %xmm14, %xmm3, %xmm14 +vpshufb 80(%rsp), %xmm15, %xmm15 +vpshufb 80(%rsp), %xmm12, %xmm12 +vpaddd %xmm10, %xmm15, %xmm10 +vpaddd %xmm11, %xmm12, %xmm11 +vpshufb 80(%rsp), %xmm13, %xmm13 +vpshufb 80(%rsp), %xmm14, %xmm14 +vpaddd %xmm8, %xmm13, %xmm8 +vpaddd %xmm9, %xmm14, %xmm9 +vmovdqa %xmm15, 112(%rsp) +vpxor %xmm5, %xmm10, %xmm5 +vpxor %xmm6, %xmm11, %xmm6 +vpslld $ 12, %xmm5, %xmm15 +vpsrld $20, %xmm5, %xmm5 +vpxor %xmm5, %xmm15, %xmm5 +vpslld $ 12, %xmm6, %xmm15 +vpsrld $20, %xmm6, %xmm6 +vpxor %xmm6, %xmm15, %xmm6 +vpxor %xmm7, %xmm8, %xmm7 +vpxor %xmm4, %xmm9, %xmm4 +vpslld $ 12, %xmm7, %xmm15 +vpsrld $20, %xmm7, %xmm7 +vpxor %xmm7, %xmm15, %xmm7 +vpslld $ 12, %xmm4, %xmm15 +vpsrld $20, %xmm4, %xmm4 +vpxor %xmm4, %xmm15, %xmm4 +vpaddd %xmm0, %xmm5, %xmm0 +vpaddd %xmm1, %xmm6, %xmm1 +vpxor 112(%rsp), %xmm0, %xmm15 +vpxor %xmm12, %xmm1, %xmm12 +vpaddd %xmm2, %xmm7, %xmm2 +vpaddd %xmm3, %xmm4, %xmm3 +vpxor %xmm13, %xmm2, %xmm13 +vpxor %xmm14, %xmm3, %xmm14 +vpshufb 96(%rsp), %xmm15, %xmm15 +vpshufb 96(%rsp), %xmm12, %xmm12 +vpaddd %xmm10, %xmm15, %xmm10 +vpaddd %xmm11, %xmm12, %xmm11 +vpshufb 96(%rsp), %xmm13, %xmm13 +vpshufb 96(%rsp), %xmm14, %xmm14 +vpaddd %xmm8, %xmm13, %xmm8 +vpaddd %xmm9, %xmm14, %xmm9 +vmovdqa %xmm15, 112(%rsp) +vpxor %xmm5, %xmm10, %xmm5 +vpxor %xmm6, %xmm11, %xmm6 +vpslld $ 7, %xmm5, %xmm15 +vpsrld $25, %xmm5, %xmm5 +vpxor %xmm5, %xmm15, %xmm5 +vpslld $ 7, %xmm6, %xmm15 +vpsrld $25, %xmm6, %xmm6 +vpxor %xmm6, %xmm15, %xmm6 +vpxor %xmm7, %xmm8, %xmm7 +vpxor %xmm4, %xmm9, %xmm4 +vpslld $ 7, %xmm7, %xmm15 +vpsrld $25, %xmm7, %xmm7 +vpxor %xmm7, %xmm15, %xmm7 +vpslld $ 7, %xmm4, %xmm15 +vpsrld $25, %xmm4, %xmm4 +vpxor %xmm4, %xmm15, %xmm4 +vmovdqa 112(%rsp), %xmm15 +subq $2, %rax +jnz chacha_blocks_avx_mainloop1 +vpaddd 128(%rsp), %xmm0, %xmm0 +vpaddd 144(%rsp), %xmm1, %xmm1 +vpaddd 160(%rsp), %xmm2, %xmm2 +vpaddd 176(%rsp), %xmm3, %xmm3 +vpaddd 192(%rsp), %xmm4, %xmm4 +vpaddd 208(%rsp), %xmm5, %xmm5 +vpaddd 224(%rsp), %xmm6, %xmm6 +vpaddd 240(%rsp), %xmm7, %xmm7 +vpaddd 256(%rsp), %xmm8, %xmm8 +vpaddd 272(%rsp), %xmm9, %xmm9 +vpaddd 288(%rsp), %xmm10, %xmm10 +vpaddd 304(%rsp), %xmm11, %xmm11 +vpaddd 320(%rsp), %xmm12, %xmm12 +vpaddd 336(%rsp), %xmm13, %xmm13 +vpaddd 352(%rsp), %xmm14, %xmm14 +vpaddd 368(%rsp), %xmm15, %xmm15 +vmovdqa %xmm8, 384(%rsp) +vmovdqa %xmm9, 400(%rsp) +vmovdqa %xmm10, 416(%rsp) +vmovdqa %xmm11, 432(%rsp) +vmovdqa %xmm12, 448(%rsp) +vmovdqa %xmm13, 464(%rsp) +vmovdqa %xmm14, 480(%rsp) +vmovdqa %xmm15, 496(%rsp) +vpunpckldq %xmm1, %xmm0, %xmm8 +vpunpckldq %xmm3, %xmm2, %xmm9 +vpunpckhdq %xmm1, %xmm0, %xmm12 +vpunpckhdq %xmm3, %xmm2, %xmm13 +vpunpckldq %xmm5, %xmm4, %xmm10 +vpunpckldq %xmm7, %xmm6, %xmm11 +vpunpckhdq %xmm5, %xmm4, %xmm14 +vpunpckhdq %xmm7, %xmm6, %xmm15 +vpunpcklqdq %xmm9, %xmm8, %xmm0 +vpunpcklqdq %xmm11, %xmm10, %xmm1 +vpunpckhqdq %xmm9, %xmm8, %xmm2 +vpunpckhqdq %xmm11, %xmm10, %xmm3 +vpunpcklqdq %xmm13, %xmm12, %xmm4 +vpunpcklqdq %xmm15, %xmm14, %xmm5 +vpunpckhqdq %xmm13, %xmm12, %xmm6 +vpunpckhqdq %xmm15, %xmm14, %xmm7 +andq %rsi, %rsi +jz chacha_blocks_avx_noinput1 +vpxor 0(%rsi), %xmm0, %xmm0 +vpxor 16(%rsi), %xmm1, %xmm1 +vpxor 64(%rsi), %xmm2, %xmm2 +vpxor 80(%rsi), %xmm3, %xmm3 +vpxor 128(%rsi), %xmm4, %xmm4 +vpxor 144(%rsi), %xmm5, %xmm5 +vpxor 192(%rsi), %xmm6, %xmm6 +vpxor 208(%rsi), %xmm7, %xmm7 +vmovdqu %xmm0, 0(%rdx) +vmovdqu %xmm1, 16(%rdx) +vmovdqu %xmm2, 64(%rdx) +vmovdqu %xmm3, 80(%rdx) +vmovdqu %xmm4, 128(%rdx) +vmovdqu %xmm5, 144(%rdx) +vmovdqu %xmm6, 192(%rdx) +vmovdqu %xmm7, 208(%rdx) +vmovdqa 384(%rsp), %xmm0 +vmovdqa 400(%rsp), %xmm1 +vmovdqa 416(%rsp), %xmm2 +vmovdqa 432(%rsp), %xmm3 +vmovdqa 448(%rsp), %xmm4 +vmovdqa 464(%rsp), %xmm5 +vmovdqa 480(%rsp), %xmm6 +vmovdqa 496(%rsp), %xmm7 +vpunpckldq %xmm1, %xmm0, %xmm8 +vpunpckldq %xmm3, %xmm2, %xmm9 +vpunpckhdq %xmm1, %xmm0, %xmm12 +vpunpckhdq %xmm3, %xmm2, %xmm13 +vpunpckldq %xmm5, %xmm4, %xmm10 +vpunpckldq %xmm7, %xmm6, %xmm11 +vpunpckhdq %xmm5, %xmm4, %xmm14 +vpunpckhdq %xmm7, %xmm6, %xmm15 +vpunpcklqdq %xmm9, %xmm8, %xmm0 +vpunpcklqdq %xmm11, %xmm10, %xmm1 +vpunpckhqdq %xmm9, %xmm8, %xmm2 +vpunpckhqdq %xmm11, %xmm10, %xmm3 +vpunpcklqdq %xmm13, %xmm12, %xmm4 +vpunpcklqdq %xmm15, %xmm14, %xmm5 +vpunpckhqdq %xmm13, %xmm12, %xmm6 +vpunpckhqdq %xmm15, %xmm14, %xmm7 +vpxor 32(%rsi), %xmm0, %xmm0 +vpxor 48(%rsi), %xmm1, %xmm1 +vpxor 96(%rsi), %xmm2, %xmm2 +vpxor 112(%rsi), %xmm3, %xmm3 +vpxor 160(%rsi), %xmm4, %xmm4 +vpxor 176(%rsi), %xmm5, %xmm5 +vpxor 224(%rsi), %xmm6, %xmm6 +vpxor 240(%rsi), %xmm7, %xmm7 +vmovdqu %xmm0, 32(%rdx) +vmovdqu %xmm1, 48(%rdx) +vmovdqu %xmm2, 96(%rdx) +vmovdqu %xmm3, 112(%rdx) +vmovdqu %xmm4, 160(%rdx) +vmovdqu %xmm5, 176(%rdx) +vmovdqu %xmm6, 224(%rdx) +vmovdqu %xmm7, 240(%rdx) +addq $256, %rsi +jmp chacha_blocks_avx_mainloop_cont +chacha_blocks_avx_noinput1: +vmovdqu %xmm0, 0(%rdx) +vmovdqu %xmm1, 16(%rdx) +vmovdqu %xmm2, 64(%rdx) +vmovdqu %xmm3, 80(%rdx) +vmovdqu %xmm4, 128(%rdx) +vmovdqu %xmm5, 144(%rdx) +vmovdqu %xmm6, 192(%rdx) +vmovdqu %xmm7, 208(%rdx) +vmovdqa 384(%rsp), %xmm0 +vmovdqa 400(%rsp), %xmm1 +vmovdqa 416(%rsp), %xmm2 +vmovdqa 432(%rsp), %xmm3 +vmovdqa 448(%rsp), %xmm4 +vmovdqa 464(%rsp), %xmm5 +vmovdqa 480(%rsp), %xmm6 +vmovdqa 496(%rsp), %xmm7 +vpunpckldq %xmm1, %xmm0, %xmm8 +vpunpckldq %xmm3, %xmm2, %xmm9 +vpunpckhdq %xmm1, %xmm0, %xmm12 +vpunpckhdq %xmm3, %xmm2, %xmm13 +vpunpckldq %xmm5, %xmm4, %xmm10 +vpunpckldq %xmm7, %xmm6, %xmm11 +vpunpckhdq %xmm5, %xmm4, %xmm14 +vpunpckhdq %xmm7, %xmm6, %xmm15 +vpunpcklqdq %xmm9, %xmm8, %xmm0 +vpunpcklqdq %xmm11, %xmm10, %xmm1 +vpunpckhqdq %xmm9, %xmm8, %xmm2 +vpunpckhqdq %xmm11, %xmm10, %xmm3 +vpunpcklqdq %xmm13, %xmm12, %xmm4 +vpunpcklqdq %xmm15, %xmm14, %xmm5 +vpunpckhqdq %xmm13, %xmm12, %xmm6 +vpunpckhqdq %xmm15, %xmm14, %xmm7 +vmovdqu %xmm0, 32(%rdx) +vmovdqu %xmm1, 48(%rdx) +vmovdqu %xmm2, 96(%rdx) +vmovdqu %xmm3, 112(%rdx) +vmovdqu %xmm4, 160(%rdx) +vmovdqu %xmm5, 176(%rdx) +vmovdqu %xmm6, 224(%rdx) +vmovdqu %xmm7, 240(%rdx) +chacha_blocks_avx_mainloop_cont: +addq $256, %rdx +subq $256, %rcx +cmp $256, %rcx +jae chacha_blocks_avx_atleast256 +vmovdqa 80(%rsp), %xmm6 +vmovdqa 96(%rsp), %xmm7 +vmovdqa 0(%rsp), %xmm8 +vmovdqa 16(%rsp), %xmm9 +vmovdqa 32(%rsp), %xmm10 +vmovdqa 48(%rsp), %xmm11 +movq $1, %r9 +chacha_blocks_avx_below256: +vmovq %r9, %xmm5 +andq %rcx, %rcx +jz chacha_blocks_avx_done +cmpq $64, %rcx +jae chacha_blocks_avx_above63 +movq %rdx, %r9 +andq %rsi, %rsi +jz chacha_blocks_avx_noinput2 +movq %rcx, %r10 +movq %rsp, %rdx +addq %r10, %rsi +addq %r10, %rdx +negq %r10 +chacha_blocks_avx_copyinput: +movb (%rsi, %r10), %al +movb %al, (%rdx, %r10) +incq %r10 +jnz chacha_blocks_avx_copyinput +movq %rsp, %rsi +chacha_blocks_avx_noinput2: +movq %rsp, %rdx +chacha_blocks_avx_above63: +vmovdqa %xmm8, %xmm0 +vmovdqa %xmm9, %xmm1 +vmovdqa %xmm10, %xmm2 +vmovdqa %xmm11, %xmm3 +movq 64(%rsp), %rax +chacha_blocks_avx_mainloop2: +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm6, %xmm3, %xmm3 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpslld $12, %xmm1, %xmm4 +vpsrld $20, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm7, %xmm3, %xmm3 +vpshufd $0x93, %xmm0, %xmm0 +vpaddd %xmm2, %xmm3, %xmm2 +vpshufd $0x4e, %xmm3, %xmm3 +vpxor %xmm1, %xmm2, %xmm1 +vpshufd $0x39, %xmm2, %xmm2 +vpslld $7, %xmm1, %xmm4 +vpsrld $25, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm6, %xmm3, %xmm3 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpslld $12, %xmm1, %xmm4 +vpsrld $20, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm7, %xmm3, %xmm3 +vpshufd $0x39, %xmm0, %xmm0 +vpaddd %xmm2, %xmm3, %xmm2 +vpshufd $0x4e, %xmm3, %xmm3 +vpxor %xmm1, %xmm2, %xmm1 +vpshufd $0x93, %xmm2, %xmm2 +vpslld $7, %xmm1, %xmm4 +vpsrld $25, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +subq $2, %rax +jnz chacha_blocks_avx_mainloop2 +vpaddd %xmm0, %xmm8, %xmm0 +vpaddd %xmm1, %xmm9, %xmm1 +vpaddd %xmm2, %xmm10, %xmm2 +vpaddd %xmm3, %xmm11, %xmm3 +andq %rsi, %rsi +jz chacha_blocks_avx_noinput3 +vpxor 0(%rsi), %xmm0, %xmm0 +vpxor 16(%rsi), %xmm1, %xmm1 +vpxor 32(%rsi), %xmm2, %xmm2 +vpxor 48(%rsi), %xmm3, %xmm3 +addq $64, %rsi +chacha_blocks_avx_noinput3: +vmovdqu %xmm0, 0(%rdx) +vmovdqu %xmm1, 16(%rdx) +vmovdqu %xmm2, 32(%rdx) +vmovdqu %xmm3, 48(%rdx) +vpaddq %xmm11, %xmm5, %xmm11 +cmpq $64, %rcx +jbe chacha_blocks_avx_mainloop2_finishup +addq $64, %rdx +subq $64, %rcx +jmp chacha_blocks_avx_below256 +chacha_blocks_avx_mainloop2_finishup: +cmpq $64, %rcx +je chacha_blocks_avx_done +addq %rcx, %r9 +addq %rcx, %rdx +negq %rcx +chacha_blocks_avx_copyoutput: +movb (%rdx, %rcx), %al +movb %al, (%r9, %rcx) +incq %rcx +jnz chacha_blocks_avx_copyoutput +chacha_blocks_avx_done: +vmovdqu %xmm11, 32(%rdi) +movq %rbp, %rsp +popq %rbp +popq %rbx +ret +FN_END chacha_blocks_avx + +GLOBAL_HIDDEN_FN hchacha_avx +hchacha_avx_local: +LOAD_VAR_PIC chacha_constants, %rax +vmovdqa 0(%rax), %xmm0 +vmovdqa 16(%rax), %xmm6 +vmovdqa 32(%rax), %xmm5 +vmovdqu 0(%rdi), %xmm1 +vmovdqu 16(%rdi), %xmm2 +vmovdqu 0(%rsi), %xmm3 +hhacha_mainloop_avx: +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm6, %xmm3, %xmm3 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpslld $12, %xmm1, %xmm4 +vpsrld $20, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm5, %xmm3, %xmm3 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpslld $7, %xmm1, %xmm4 +vpsrld $25, %xmm1, %xmm1 +vpshufd $0x93, %xmm0, %xmm0 +vpxor %xmm1, %xmm4, %xmm1 +vpshufd $0x4e, %xmm3, %xmm3 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm6, %xmm3, %xmm3 +vpshufd $0x39, %xmm2, %xmm2 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpslld $12, %xmm1, %xmm4 +vpsrld $20, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm5, %xmm3, %xmm3 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpshufd $0x39, %xmm0, %xmm0 +vpslld $7, %xmm1, %xmm4 +vpshufd $0x4e, %xmm3, %xmm3 +vpsrld $25, %xmm1, %xmm1 +vpshufd $0x93, %xmm2, %xmm2 +vpxor %xmm1, %xmm4, %xmm1 +subl $2, %ecx +jne hhacha_mainloop_avx +vmovdqu %xmm0, (%rdx) +vmovdqu %xmm3, 16(%rdx) +ret +FN_END hchacha_avx + +GLOBAL_HIDDEN_FN_EXT chacha_avx, 6, 16 +pushq %rbp +movq %rsp, %rbp +subq $64, %rsp +andq $~63, %rsp +vmovdqu 0(%rdi), %xmm0 +vmovdqu 16(%rdi), %xmm1 +vmovdqa %xmm0, 0(%rsp) +vmovdqa %xmm1, 16(%rsp) +xorq %rdi, %rdi +movq %rdi, 32(%rsp) +movq 0(%rsi), %rsi +movq %rsi, 40(%rsp) +movq %r9, 48(%rsp) +movq %rsp, %rdi +movq %rdx, %rsi +movq %rcx, %rdx +movq %r8, %rcx +call chacha_blocks_avx_local +vpxor %xmm0, %xmm0, %xmm0 +vmovdqa %xmm0, 0(%rsp) +vmovdqa %xmm0, 16(%rsp) +vmovdqa %xmm0, 32(%rsp) +movq %rbp, %rsp +popq %rbp +ret +FN_END chacha_avx + +GLOBAL_HIDDEN_FN_EXT xchacha_avx, 6, 16 +pushq %rbp +pushq %rbx +movq %rsp, %rbp +subq $64, %rsp +andq $~63, %rsp +movq %rsp, %rbx +xorq %rax, %rax +movq %rax, 32(%rbx) +movq 16(%rsi), %rax +movq %rax, 40(%rbx) +movq %r9, 48(%rbx) +pushq %rdx +pushq %rcx +pushq %r8 +movq %rbx, %rdx +movq %r9, %rcx +call hchacha_avx_local +movq %rbx, %rdi +popq %rcx +popq %rdx +popq %rsi +call chacha_blocks_avx_local +vpxor %xmm0, %xmm0, %xmm0 +vmovdqa %xmm0, 0(%rbx) +vmovdqa %xmm0, 16(%rbx) +vmovdqa %xmm0, 32(%rbx) +movq %rbp, %rsp +popq %rbx +popq %rbp +ret +FN_END xchacha_avx diff --git a/src/libcryptobox/chacha20/avx2.S b/src/libcryptobox/chacha20/avx2.S new file mode 100644 index 0000000..efd0f54 --- /dev/null +++ b/src/libcryptobox/chacha20/avx2.S @@ -0,0 +1,1018 @@ +#include "../macro.S" +#include "constants.S" +SECTION_TEXT + +GLOBAL_HIDDEN_FN chacha_blocks_avx2 +chacha_blocks_avx2_local: +pushq %rbx +pushq %rbp +pushq %r12 +pushq %r13 +pushq %r14 +movq %rsp, %rbp +andq $~63, %rsp +subq $512, %rsp +LOAD_VAR_PIC chacha_constants, %rax +vmovdqa 0(%rax), %xmm8 +vmovdqa 16(%rax), %xmm6 +vmovdqa 32(%rax), %xmm7 +vmovdqu 0(%rdi), %xmm9 +vmovdqu 16(%rdi), %xmm10 +vmovdqu 32(%rdi), %xmm11 +movq 48(%rdi), %rax +movq $1, %r9 +vmovdqa %xmm8, 0(%rsp) +vmovdqa %xmm9, 16(%rsp) +vmovdqa %xmm10, 32(%rsp) +vmovdqa %xmm11, 48(%rsp) +movq %rax, 64(%rsp) +vmovdqa %xmm6, 448(%rsp) +vmovdqa %xmm6, 464(%rsp) +vmovdqa %xmm7, 480(%rsp) +vmovdqa %xmm7, 496(%rsp) +cmpq $512, %rcx +jae chacha_blocks_avx2_atleast512 +cmp $256, %rcx +jae chacha_blocks_avx2_atleast256 +jmp chacha_blocks_avx2_below256 +.p2align 6,,63 +chacha_blocks_avx2_atleast512: +movq 48(%rsp), %rax +leaq 1(%rax), %r8 +leaq 2(%rax), %r9 +leaq 3(%rax), %r10 +leaq 4(%rax), %rbx +leaq 5(%rax), %r11 +leaq 6(%rax), %r12 +leaq 7(%rax), %r13 +leaq 8(%rax), %r14 +movl %eax, 128(%rsp) +movl %r8d, 4+128(%rsp) +movl %r9d, 8+128(%rsp) +movl %r10d, 12+128(%rsp) +movl %ebx, 16+128(%rsp) +movl %r11d, 20+128(%rsp) +movl %r12d, 24+128(%rsp) +movl %r13d, 28+128(%rsp) +shrq $32, %rax +shrq $32, %r8 +shrq $32, %r9 +shrq $32, %r10 +shrq $32, %rbx +shrq $32, %r11 +shrq $32, %r12 +shrq $32, %r13 +movl %eax, 160(%rsp) +movl %r8d, 4+160(%rsp) +movl %r9d, 8+160(%rsp) +movl %r10d, 12+160(%rsp) +movl %ebx, 16+160(%rsp) +movl %r11d, 20+160(%rsp) +movl %r12d, 24+160(%rsp) +movl %r13d, 28+160(%rsp) +movq %r14, 48(%rsp) +movq 64(%rsp), %rax +vpbroadcastd 0(%rsp), %ymm0 +vpbroadcastd 4+0(%rsp), %ymm1 +vpbroadcastd 8+0(%rsp), %ymm2 +vpbroadcastd 12+0(%rsp), %ymm3 +vpbroadcastd 16(%rsp), %ymm4 +vpbroadcastd 4+16(%rsp), %ymm5 +vpbroadcastd 8+16(%rsp), %ymm6 +vpbroadcastd 12+16(%rsp), %ymm7 +vpbroadcastd 32(%rsp), %ymm8 +vpbroadcastd 4+32(%rsp), %ymm9 +vpbroadcastd 8+32(%rsp), %ymm10 +vpbroadcastd 12+32(%rsp), %ymm11 +vpbroadcastd 8+48(%rsp), %ymm14 +vpbroadcastd 12+48(%rsp), %ymm15 +vmovdqa 128(%rsp), %ymm12 +vmovdqa 160(%rsp), %ymm13 +chacha_blocks_avx2_mainloop1: +vpaddd %ymm0, %ymm4, %ymm0 +vpaddd %ymm1, %ymm5, %ymm1 +vpxor %ymm12, %ymm0, %ymm12 +vpxor %ymm13, %ymm1, %ymm13 +vpaddd %ymm2, %ymm6, %ymm2 +vpaddd %ymm3, %ymm7, %ymm3 +vpxor %ymm14, %ymm2, %ymm14 +vpxor %ymm15, %ymm3, %ymm15 +vpshufb 448(%rsp), %ymm12, %ymm12 +vpshufb 448(%rsp), %ymm13, %ymm13 +vpaddd %ymm8, %ymm12, %ymm8 +vpaddd %ymm9, %ymm13, %ymm9 +vpshufb 448(%rsp), %ymm14, %ymm14 +vpshufb 448(%rsp), %ymm15, %ymm15 +vpaddd %ymm10, %ymm14, %ymm10 +vpaddd %ymm11, %ymm15, %ymm11 +vmovdqa %ymm12, 96(%rsp) +vpxor %ymm4, %ymm8, %ymm4 +vpxor %ymm5, %ymm9, %ymm5 +vpslld $ 12, %ymm4, %ymm12 +vpsrld $20, %ymm4, %ymm4 +vpxor %ymm4, %ymm12, %ymm4 +vpslld $ 12, %ymm5, %ymm12 +vpsrld $20, %ymm5, %ymm5 +vpxor %ymm5, %ymm12, %ymm5 +vpxor %ymm6, %ymm10, %ymm6 +vpxor %ymm7, %ymm11, %ymm7 +vpslld $ 12, %ymm6, %ymm12 +vpsrld $20, %ymm6, %ymm6 +vpxor %ymm6, %ymm12, %ymm6 +vpslld $ 12, %ymm7, %ymm12 +vpsrld $20, %ymm7, %ymm7 +vpxor %ymm7, %ymm12, %ymm7 +vpaddd %ymm0, %ymm4, %ymm0 +vpaddd %ymm1, %ymm5, %ymm1 +vpxor 96(%rsp), %ymm0, %ymm12 +vpxor %ymm13, %ymm1, %ymm13 +vpaddd %ymm2, %ymm6, %ymm2 +vpaddd %ymm3, %ymm7, %ymm3 +vpxor %ymm14, %ymm2, %ymm14 +vpxor %ymm15, %ymm3, %ymm15 +vpshufb 480(%rsp), %ymm12, %ymm12 +vpshufb 480(%rsp), %ymm13, %ymm13 +vpaddd %ymm8, %ymm12, %ymm8 +vpaddd %ymm9, %ymm13, %ymm9 +vpshufb 480(%rsp), %ymm14, %ymm14 +vpshufb 480(%rsp), %ymm15, %ymm15 +vpaddd %ymm10, %ymm14, %ymm10 +vpaddd %ymm11, %ymm15, %ymm11 +vmovdqa %ymm12, 96(%rsp) +vpxor %ymm4, %ymm8, %ymm4 +vpxor %ymm5, %ymm9, %ymm5 +vpslld $ 7, %ymm4, %ymm12 +vpsrld $25, %ymm4, %ymm4 +vpxor %ymm4, %ymm12, %ymm4 +vpslld $ 7, %ymm5, %ymm12 +vpsrld $25, %ymm5, %ymm5 +vpxor %ymm5, %ymm12, %ymm5 +vpxor %ymm6, %ymm10, %ymm6 +vpxor %ymm7, %ymm11, %ymm7 +vpslld $ 7, %ymm6, %ymm12 +vpsrld $25, %ymm6, %ymm6 +vpxor %ymm6, %ymm12, %ymm6 +vpslld $ 7, %ymm7, %ymm12 +vpsrld $25, %ymm7, %ymm7 +vpxor %ymm7, %ymm12, %ymm7 +vpaddd %ymm0, %ymm5, %ymm0 +vpaddd %ymm1, %ymm6, %ymm1 +vpxor %ymm15, %ymm0, %ymm15 +vpxor 96(%rsp), %ymm1, %ymm12 +vpaddd %ymm2, %ymm7, %ymm2 +vpaddd %ymm3, %ymm4, %ymm3 +vpxor %ymm13, %ymm2, %ymm13 +vpxor %ymm14, %ymm3, %ymm14 +vpshufb 448(%rsp), %ymm15, %ymm15 +vpshufb 448(%rsp), %ymm12, %ymm12 +vpaddd %ymm10, %ymm15, %ymm10 +vpaddd %ymm11, %ymm12, %ymm11 +vpshufb 448(%rsp), %ymm13, %ymm13 +vpshufb 448(%rsp), %ymm14, %ymm14 +vpaddd %ymm8, %ymm13, %ymm8 +vpaddd %ymm9, %ymm14, %ymm9 +vmovdqa %ymm15, 96(%rsp) +vpxor %ymm5, %ymm10, %ymm5 +vpxor %ymm6, %ymm11, %ymm6 +vpslld $ 12, %ymm5, %ymm15 +vpsrld $20, %ymm5, %ymm5 +vpxor %ymm5, %ymm15, %ymm5 +vpslld $ 12, %ymm6, %ymm15 +vpsrld $20, %ymm6, %ymm6 +vpxor %ymm6, %ymm15, %ymm6 +vpxor %ymm7, %ymm8, %ymm7 +vpxor %ymm4, %ymm9, %ymm4 +vpslld $ 12, %ymm7, %ymm15 +vpsrld $20, %ymm7, %ymm7 +vpxor %ymm7, %ymm15, %ymm7 +vpslld $ 12, %ymm4, %ymm15 +vpsrld $20, %ymm4, %ymm4 +vpxor %ymm4, %ymm15, %ymm4 +vpaddd %ymm0, %ymm5, %ymm0 +vpaddd %ymm1, %ymm6, %ymm1 +vpxor 96(%rsp), %ymm0, %ymm15 +vpxor %ymm12, %ymm1, %ymm12 +vpaddd %ymm2, %ymm7, %ymm2 +vpaddd %ymm3, %ymm4, %ymm3 +vpxor %ymm13, %ymm2, %ymm13 +vpxor %ymm14, %ymm3, %ymm14 +vpshufb 480(%rsp), %ymm15, %ymm15 +vpshufb 480(%rsp), %ymm12, %ymm12 +vpaddd %ymm10, %ymm15, %ymm10 +vpaddd %ymm11, %ymm12, %ymm11 +vpshufb 480(%rsp), %ymm13, %ymm13 +vpshufb 480(%rsp), %ymm14, %ymm14 +vpaddd %ymm8, %ymm13, %ymm8 +vpaddd %ymm9, %ymm14, %ymm9 +vmovdqa %ymm15, 96(%rsp) +vpxor %ymm5, %ymm10, %ymm5 +vpxor %ymm6, %ymm11, %ymm6 +vpslld $ 7, %ymm5, %ymm15 +vpsrld $25, %ymm5, %ymm5 +vpxor %ymm5, %ymm15, %ymm5 +vpslld $ 7, %ymm6, %ymm15 +vpsrld $25, %ymm6, %ymm6 +vpxor %ymm6, %ymm15, %ymm6 +vpxor %ymm7, %ymm8, %ymm7 +vpxor %ymm4, %ymm9, %ymm4 +vpslld $ 7, %ymm7, %ymm15 +vpsrld $25, %ymm7, %ymm7 +vpxor %ymm7, %ymm15, %ymm7 +vpslld $ 7, %ymm4, %ymm15 +vpsrld $25, %ymm4, %ymm4 +vpxor %ymm4, %ymm15, %ymm4 +vmovdqa 96(%rsp), %ymm15 +subq $2, %rax +jnz chacha_blocks_avx2_mainloop1 +vmovdqa %ymm8, 192(%rsp) +vmovdqa %ymm9, 224(%rsp) +vmovdqa %ymm10, 256(%rsp) +vmovdqa %ymm11, 288(%rsp) +vmovdqa %ymm12, 320(%rsp) +vmovdqa %ymm13, 352(%rsp) +vmovdqa %ymm14, 384(%rsp) +vmovdqa %ymm15, 416(%rsp) +vpbroadcastd 0(%rsp), %ymm8 +vpbroadcastd 4+0(%rsp), %ymm9 +vpbroadcastd 8+0(%rsp), %ymm10 +vpbroadcastd 12+0(%rsp), %ymm11 +vpbroadcastd 16(%rsp), %ymm12 +vpbroadcastd 4+16(%rsp), %ymm13 +vpbroadcastd 8+16(%rsp), %ymm14 +vpbroadcastd 12+16(%rsp), %ymm15 +vpaddd %ymm8, %ymm0, %ymm0 +vpaddd %ymm9, %ymm1, %ymm1 +vpaddd %ymm10, %ymm2, %ymm2 +vpaddd %ymm11, %ymm3, %ymm3 +vpaddd %ymm12, %ymm4, %ymm4 +vpaddd %ymm13, %ymm5, %ymm5 +vpaddd %ymm14, %ymm6, %ymm6 +vpaddd %ymm15, %ymm7, %ymm7 +vpunpckldq %ymm1, %ymm0, %ymm8 +vpunpckldq %ymm3, %ymm2, %ymm9 +vpunpckhdq %ymm1, %ymm0, %ymm12 +vpunpckhdq %ymm3, %ymm2, %ymm13 +vpunpckldq %ymm5, %ymm4, %ymm10 +vpunpckldq %ymm7, %ymm6, %ymm11 +vpunpckhdq %ymm5, %ymm4, %ymm14 +vpunpckhdq %ymm7, %ymm6, %ymm15 +vpunpcklqdq %ymm9, %ymm8, %ymm0 +vpunpcklqdq %ymm11, %ymm10, %ymm1 +vpunpckhqdq %ymm9, %ymm8, %ymm2 +vpunpckhqdq %ymm11, %ymm10, %ymm3 +vpunpcklqdq %ymm13, %ymm12, %ymm4 +vpunpcklqdq %ymm15, %ymm14, %ymm5 +vpunpckhqdq %ymm13, %ymm12, %ymm6 +vpunpckhqdq %ymm15, %ymm14, %ymm7 +vperm2i128 $0x20, %ymm1, %ymm0, %ymm8 +vperm2i128 $0x20, %ymm3, %ymm2, %ymm9 +vperm2i128 $0x31, %ymm1, %ymm0, %ymm12 +vperm2i128 $0x31, %ymm3, %ymm2, %ymm13 +vperm2i128 $0x20, %ymm5, %ymm4, %ymm10 +vperm2i128 $0x20, %ymm7, %ymm6, %ymm11 +vperm2i128 $0x31, %ymm5, %ymm4, %ymm14 +vperm2i128 $0x31, %ymm7, %ymm6, %ymm15 +andq %rsi, %rsi +jz chacha_blocks_avx2_noinput1 +vpxor 0(%rsi), %ymm8, %ymm8 +vpxor 64(%rsi), %ymm9, %ymm9 +vpxor 128(%rsi), %ymm10, %ymm10 +vpxor 192(%rsi), %ymm11, %ymm11 +vpxor 256(%rsi), %ymm12, %ymm12 +vpxor 320(%rsi), %ymm13, %ymm13 +vpxor 384(%rsi), %ymm14, %ymm14 +vpxor 448(%rsi), %ymm15, %ymm15 +vmovdqu %ymm8, 0(%rdx) +vmovdqu %ymm9, 64(%rdx) +vmovdqu %ymm10, 128(%rdx) +vmovdqu %ymm11, 192(%rdx) +vmovdqu %ymm12, 256(%rdx) +vmovdqu %ymm13, 320(%rdx) +vmovdqu %ymm14, 384(%rdx) +vmovdqu %ymm15, 448(%rdx) +vmovdqa 192(%rsp), %ymm0 +vmovdqa 224(%rsp), %ymm1 +vmovdqa 256(%rsp), %ymm2 +vmovdqa 288(%rsp), %ymm3 +vmovdqa 320(%rsp), %ymm4 +vmovdqa 352(%rsp), %ymm5 +vmovdqa 384(%rsp), %ymm6 +vmovdqa 416(%rsp), %ymm7 +vpbroadcastd 32(%rsp), %ymm8 +vpbroadcastd 4+32(%rsp), %ymm9 +vpbroadcastd 8+32(%rsp), %ymm10 +vpbroadcastd 12+32(%rsp), %ymm11 +vmovdqa 128(%rsp), %ymm12 +vmovdqa 160(%rsp), %ymm13 +vpbroadcastd 8+48(%rsp), %ymm14 +vpbroadcastd 12+48(%rsp), %ymm15 +vpaddd %ymm8, %ymm0, %ymm0 +vpaddd %ymm9, %ymm1, %ymm1 +vpaddd %ymm10, %ymm2, %ymm2 +vpaddd %ymm11, %ymm3, %ymm3 +vpaddd %ymm12, %ymm4, %ymm4 +vpaddd %ymm13, %ymm5, %ymm5 +vpaddd %ymm14, %ymm6, %ymm6 +vpaddd %ymm15, %ymm7, %ymm7 +vpunpckldq %ymm1, %ymm0, %ymm8 +vpunpckldq %ymm3, %ymm2, %ymm9 +vpunpckhdq %ymm1, %ymm0, %ymm12 +vpunpckhdq %ymm3, %ymm2, %ymm13 +vpunpckldq %ymm5, %ymm4, %ymm10 +vpunpckldq %ymm7, %ymm6, %ymm11 +vpunpckhdq %ymm5, %ymm4, %ymm14 +vpunpckhdq %ymm7, %ymm6, %ymm15 +vpunpcklqdq %ymm9, %ymm8, %ymm0 +vpunpcklqdq %ymm11, %ymm10, %ymm1 +vpunpckhqdq %ymm9, %ymm8, %ymm2 +vpunpckhqdq %ymm11, %ymm10, %ymm3 +vpunpcklqdq %ymm13, %ymm12, %ymm4 +vpunpcklqdq %ymm15, %ymm14, %ymm5 +vpunpckhqdq %ymm13, %ymm12, %ymm6 +vpunpckhqdq %ymm15, %ymm14, %ymm7 +vperm2i128 $0x20, %ymm1, %ymm0, %ymm8 +vperm2i128 $0x20, %ymm3, %ymm2, %ymm9 +vperm2i128 $0x31, %ymm1, %ymm0, %ymm12 +vperm2i128 $0x31, %ymm3, %ymm2, %ymm13 +vperm2i128 $0x20, %ymm5, %ymm4, %ymm10 +vperm2i128 $0x20, %ymm7, %ymm6, %ymm11 +vperm2i128 $0x31, %ymm5, %ymm4, %ymm14 +vperm2i128 $0x31, %ymm7, %ymm6, %ymm15 +vpxor 32(%rsi), %ymm8, %ymm8 +vpxor 96(%rsi), %ymm9, %ymm9 +vpxor 160(%rsi), %ymm10, %ymm10 +vpxor 224(%rsi), %ymm11, %ymm11 +vpxor 288(%rsi), %ymm12, %ymm12 +vpxor 352(%rsi), %ymm13, %ymm13 +vpxor 416(%rsi), %ymm14, %ymm14 +vpxor 480(%rsi), %ymm15, %ymm15 +vmovdqu %ymm8, 32(%rdx) +vmovdqu %ymm9, 96(%rdx) +vmovdqu %ymm10, 160(%rdx) +vmovdqu %ymm11, 224(%rdx) +vmovdqu %ymm12, 288(%rdx) +vmovdqu %ymm13, 352(%rdx) +vmovdqu %ymm14, 416(%rdx) +vmovdqu %ymm15, 480(%rdx) +addq $512, %rsi +jmp chacha_blocks_avx2_mainloop1_cont +chacha_blocks_avx2_noinput1: +vmovdqu %ymm8, 0(%rdx) +vmovdqu %ymm9, 64(%rdx) +vmovdqu %ymm10, 128(%rdx) +vmovdqu %ymm11, 192(%rdx) +vmovdqu %ymm12, 256(%rdx) +vmovdqu %ymm13, 320(%rdx) +vmovdqu %ymm14, 384(%rdx) +vmovdqu %ymm15, 448(%rdx) +vmovdqa 192(%rsp), %ymm0 +vmovdqa 224(%rsp), %ymm1 +vmovdqa 256(%rsp), %ymm2 +vmovdqa 288(%rsp), %ymm3 +vmovdqa 320(%rsp), %ymm4 +vmovdqa 352(%rsp), %ymm5 +vmovdqa 384(%rsp), %ymm6 +vmovdqa 416(%rsp), %ymm7 +vpbroadcastd 32(%rsp), %ymm8 +vpbroadcastd 4+32(%rsp), %ymm9 +vpbroadcastd 8+32(%rsp), %ymm10 +vpbroadcastd 12+32(%rsp), %ymm11 +vmovdqa 128(%rsp), %ymm12 +vmovdqa 160(%rsp), %ymm13 +vpbroadcastd 8+48(%rsp), %ymm14 +vpbroadcastd 12+48(%rsp), %ymm15 +vpaddd %ymm8, %ymm0, %ymm0 +vpaddd %ymm9, %ymm1, %ymm1 +vpaddd %ymm10, %ymm2, %ymm2 +vpaddd %ymm11, %ymm3, %ymm3 +vpaddd %ymm12, %ymm4, %ymm4 +vpaddd %ymm13, %ymm5, %ymm5 +vpaddd %ymm14, %ymm6, %ymm6 +vpaddd %ymm15, %ymm7, %ymm7 +vpunpckldq %ymm1, %ymm0, %ymm8 +vpunpckldq %ymm3, %ymm2, %ymm9 +vpunpckhdq %ymm1, %ymm0, %ymm12 +vpunpckhdq %ymm3, %ymm2, %ymm13 +vpunpckldq %ymm5, %ymm4, %ymm10 +vpunpckldq %ymm7, %ymm6, %ymm11 +vpunpckhdq %ymm5, %ymm4, %ymm14 +vpunpckhdq %ymm7, %ymm6, %ymm15 +vpunpcklqdq %ymm9, %ymm8, %ymm0 +vpunpcklqdq %ymm11, %ymm10, %ymm1 +vpunpckhqdq %ymm9, %ymm8, %ymm2 +vpunpckhqdq %ymm11, %ymm10, %ymm3 +vpunpcklqdq %ymm13, %ymm12, %ymm4 +vpunpcklqdq %ymm15, %ymm14, %ymm5 +vpunpckhqdq %ymm13, %ymm12, %ymm6 +vpunpckhqdq %ymm15, %ymm14, %ymm7 +vperm2i128 $0x20, %ymm1, %ymm0, %ymm8 +vperm2i128 $0x20, %ymm3, %ymm2, %ymm9 +vperm2i128 $0x31, %ymm1, %ymm0, %ymm12 +vperm2i128 $0x31, %ymm3, %ymm2, %ymm13 +vperm2i128 $0x20, %ymm5, %ymm4, %ymm10 +vperm2i128 $0x20, %ymm7, %ymm6, %ymm11 +vperm2i128 $0x31, %ymm5, %ymm4, %ymm14 +vperm2i128 $0x31, %ymm7, %ymm6, %ymm15 +vmovdqu %ymm8, 32(%rdx) +vmovdqu %ymm9, 96(%rdx) +vmovdqu %ymm10, 160(%rdx) +vmovdqu %ymm11, 224(%rdx) +vmovdqu %ymm12, 288(%rdx) +vmovdqu %ymm13, 352(%rdx) +vmovdqu %ymm14, 416(%rdx) +vmovdqu %ymm15, 480(%rdx) +chacha_blocks_avx2_mainloop1_cont: +addq $512, %rdx +subq $512, %rcx +cmp $512, %rcx +jae chacha_blocks_avx2_atleast512 +cmp $256, %rcx +jb chacha_blocks_avx2_below256_fixup +chacha_blocks_avx2_atleast256: +movq 48(%rsp), %rax +leaq 1(%rax), %r8 +leaq 2(%rax), %r9 +leaq 3(%rax), %r10 +leaq 4(%rax), %rbx +movl %eax, 128(%rsp) +movl %r8d, 4+128(%rsp) +movl %r9d, 8+128(%rsp) +movl %r10d, 12+128(%rsp) +shrq $32, %rax +shrq $32, %r8 +shrq $32, %r9 +shrq $32, %r10 +movl %eax, 160(%rsp) +movl %r8d, 4+160(%rsp) +movl %r9d, 8+160(%rsp) +movl %r10d, 12+160(%rsp) +movq %rbx, 48(%rsp) +movq 64(%rsp), %rax +vpbroadcastd 0(%rsp), %xmm0 +vpbroadcastd 4+0(%rsp), %xmm1 +vpbroadcastd 8+0(%rsp), %xmm2 +vpbroadcastd 12+0(%rsp), %xmm3 +vpbroadcastd 16(%rsp), %xmm4 +vpbroadcastd 4+16(%rsp), %xmm5 +vpbroadcastd 8+16(%rsp), %xmm6 +vpbroadcastd 12+16(%rsp), %xmm7 +vpbroadcastd 32(%rsp), %xmm8 +vpbroadcastd 4+32(%rsp), %xmm9 +vpbroadcastd 8+32(%rsp), %xmm10 +vpbroadcastd 12+32(%rsp), %xmm11 +vmovdqa 128(%rsp), %xmm12 +vmovdqa 160(%rsp), %xmm13 +vpbroadcastd 8+48(%rsp), %xmm14 +vpbroadcastd 12+48(%rsp), %xmm15 +chacha_blocks_avx2_mainloop2: +vpaddd %xmm0, %xmm4, %xmm0 +vpaddd %xmm1, %xmm5, %xmm1 +vpxor %xmm12, %xmm0, %xmm12 +vpxor %xmm13, %xmm1, %xmm13 +vpaddd %xmm2, %xmm6, %xmm2 +vpaddd %xmm3, %xmm7, %xmm3 +vpxor %xmm14, %xmm2, %xmm14 +vpxor %xmm15, %xmm3, %xmm15 +vpshufb 448(%rsp), %xmm12, %xmm12 +vpshufb 448(%rsp), %xmm13, %xmm13 +vpaddd %xmm8, %xmm12, %xmm8 +vpaddd %xmm9, %xmm13, %xmm9 +vpshufb 448(%rsp), %xmm14, %xmm14 +vpshufb 448(%rsp), %xmm15, %xmm15 +vpaddd %xmm10, %xmm14, %xmm10 +vpaddd %xmm11, %xmm15, %xmm11 +vmovdqa %xmm12, 96(%rsp) +vpxor %xmm4, %xmm8, %xmm4 +vpxor %xmm5, %xmm9, %xmm5 +vpslld $ 12, %xmm4, %xmm12 +vpsrld $20, %xmm4, %xmm4 +vpxor %xmm4, %xmm12, %xmm4 +vpslld $ 12, %xmm5, %xmm12 +vpsrld $20, %xmm5, %xmm5 +vpxor %xmm5, %xmm12, %xmm5 +vpxor %xmm6, %xmm10, %xmm6 +vpxor %xmm7, %xmm11, %xmm7 +vpslld $ 12, %xmm6, %xmm12 +vpsrld $20, %xmm6, %xmm6 +vpxor %xmm6, %xmm12, %xmm6 +vpslld $ 12, %xmm7, %xmm12 +vpsrld $20, %xmm7, %xmm7 +vpxor %xmm7, %xmm12, %xmm7 +vpaddd %xmm0, %xmm4, %xmm0 +vpaddd %xmm1, %xmm5, %xmm1 +vpxor 96(%rsp), %xmm0, %xmm12 +vpxor %xmm13, %xmm1, %xmm13 +vpaddd %xmm2, %xmm6, %xmm2 +vpaddd %xmm3, %xmm7, %xmm3 +vpxor %xmm14, %xmm2, %xmm14 +vpxor %xmm15, %xmm3, %xmm15 +vpshufb 480(%rsp), %xmm12, %xmm12 +vpshufb 480(%rsp), %xmm13, %xmm13 +vpaddd %xmm8, %xmm12, %xmm8 +vpaddd %xmm9, %xmm13, %xmm9 +vpshufb 480(%rsp), %xmm14, %xmm14 +vpshufb 480(%rsp), %xmm15, %xmm15 +vpaddd %xmm10, %xmm14, %xmm10 +vpaddd %xmm11, %xmm15, %xmm11 +vmovdqa %xmm12, 96(%rsp) +vpxor %xmm4, %xmm8, %xmm4 +vpxor %xmm5, %xmm9, %xmm5 +vpslld $ 7, %xmm4, %xmm12 +vpsrld $25, %xmm4, %xmm4 +vpxor %xmm4, %xmm12, %xmm4 +vpslld $ 7, %xmm5, %xmm12 +vpsrld $25, %xmm5, %xmm5 +vpxor %xmm5, %xmm12, %xmm5 +vpxor %xmm6, %xmm10, %xmm6 +vpxor %xmm7, %xmm11, %xmm7 +vpslld $ 7, %xmm6, %xmm12 +vpsrld $25, %xmm6, %xmm6 +vpxor %xmm6, %xmm12, %xmm6 +vpslld $ 7, %xmm7, %xmm12 +vpsrld $25, %xmm7, %xmm7 +vpxor %xmm7, %xmm12, %xmm7 +vpaddd %xmm0, %xmm5, %xmm0 +vpaddd %xmm1, %xmm6, %xmm1 +vpxor %xmm15, %xmm0, %xmm15 +vpxor 96(%rsp), %xmm1, %xmm12 +vpaddd %xmm2, %xmm7, %xmm2 +vpaddd %xmm3, %xmm4, %xmm3 +vpxor %xmm13, %xmm2, %xmm13 +vpxor %xmm14, %xmm3, %xmm14 +vpshufb 448(%rsp), %xmm15, %xmm15 +vpshufb 448(%rsp), %xmm12, %xmm12 +vpaddd %xmm10, %xmm15, %xmm10 +vpaddd %xmm11, %xmm12, %xmm11 +vpshufb 448(%rsp), %xmm13, %xmm13 +vpshufb 448(%rsp), %xmm14, %xmm14 +vpaddd %xmm8, %xmm13, %xmm8 +vpaddd %xmm9, %xmm14, %xmm9 +vmovdqa %xmm15, 96(%rsp) +vpxor %xmm5, %xmm10, %xmm5 +vpxor %xmm6, %xmm11, %xmm6 +vpslld $ 12, %xmm5, %xmm15 +vpsrld $20, %xmm5, %xmm5 +vpxor %xmm5, %xmm15, %xmm5 +vpslld $ 12, %xmm6, %xmm15 +vpsrld $20, %xmm6, %xmm6 +vpxor %xmm6, %xmm15, %xmm6 +vpxor %xmm7, %xmm8, %xmm7 +vpxor %xmm4, %xmm9, %xmm4 +vpslld $ 12, %xmm7, %xmm15 +vpsrld $20, %xmm7, %xmm7 +vpxor %xmm7, %xmm15, %xmm7 +vpslld $ 12, %xmm4, %xmm15 +vpsrld $20, %xmm4, %xmm4 +vpxor %xmm4, %xmm15, %xmm4 +vpaddd %xmm0, %xmm5, %xmm0 +vpaddd %xmm1, %xmm6, %xmm1 +vpxor 96(%rsp), %xmm0, %xmm15 +vpxor %xmm12, %xmm1, %xmm12 +vpaddd %xmm2, %xmm7, %xmm2 +vpaddd %xmm3, %xmm4, %xmm3 +vpxor %xmm13, %xmm2, %xmm13 +vpxor %xmm14, %xmm3, %xmm14 +vpshufb 480(%rsp), %xmm15, %xmm15 +vpshufb 480(%rsp), %xmm12, %xmm12 +vpaddd %xmm10, %xmm15, %xmm10 +vpaddd %xmm11, %xmm12, %xmm11 +vpshufb 480(%rsp), %xmm13, %xmm13 +vpshufb 480(%rsp), %xmm14, %xmm14 +vpaddd %xmm8, %xmm13, %xmm8 +vpaddd %xmm9, %xmm14, %xmm9 +vmovdqa %xmm15, 96(%rsp) +vpxor %xmm5, %xmm10, %xmm5 +vpxor %xmm6, %xmm11, %xmm6 +vpslld $ 7, %xmm5, %xmm15 +vpsrld $25, %xmm5, %xmm5 +vpxor %xmm5, %xmm15, %xmm5 +vpslld $ 7, %xmm6, %xmm15 +vpsrld $25, %xmm6, %xmm6 +vpxor %xmm6, %xmm15, %xmm6 +vpxor %xmm7, %xmm8, %xmm7 +vpxor %xmm4, %xmm9, %xmm4 +vpslld $ 7, %xmm7, %xmm15 +vpsrld $25, %xmm7, %xmm7 +vpxor %xmm7, %xmm15, %xmm7 +vpslld $ 7, %xmm4, %xmm15 +vpsrld $25, %xmm4, %xmm4 +vpxor %xmm4, %xmm15, %xmm4 +vmovdqa 96(%rsp), %xmm15 +subq $2, %rax +jnz chacha_blocks_avx2_mainloop2 +vmovdqa %xmm8, 192(%rsp) +vmovdqa %xmm9, 208(%rsp) +vmovdqa %xmm10, 224(%rsp) +vmovdqa %xmm11, 240(%rsp) +vmovdqa %xmm12, 256(%rsp) +vmovdqa %xmm13, 272(%rsp) +vmovdqa %xmm14, 288(%rsp) +vmovdqa %xmm15, 304(%rsp) +vpbroadcastd 0(%rsp), %xmm8 +vpbroadcastd 4+0(%rsp), %xmm9 +vpbroadcastd 8+0(%rsp), %xmm10 +vpbroadcastd 12+0(%rsp), %xmm11 +vpbroadcastd 16(%rsp), %xmm12 +vpbroadcastd 4+16(%rsp), %xmm13 +vpbroadcastd 8+16(%rsp), %xmm14 +vpbroadcastd 12+16(%rsp), %xmm15 +vpaddd %xmm8, %xmm0, %xmm0 +vpaddd %xmm9, %xmm1, %xmm1 +vpaddd %xmm10, %xmm2, %xmm2 +vpaddd %xmm11, %xmm3, %xmm3 +vpaddd %xmm12, %xmm4, %xmm4 +vpaddd %xmm13, %xmm5, %xmm5 +vpaddd %xmm14, %xmm6, %xmm6 +vpaddd %xmm15, %xmm7, %xmm7 +vpunpckldq %xmm1, %xmm0, %xmm8 +vpunpckldq %xmm3, %xmm2, %xmm9 +vpunpckhdq %xmm1, %xmm0, %xmm12 +vpunpckhdq %xmm3, %xmm2, %xmm13 +vpunpckldq %xmm5, %xmm4, %xmm10 +vpunpckldq %xmm7, %xmm6, %xmm11 +vpunpckhdq %xmm5, %xmm4, %xmm14 +vpunpckhdq %xmm7, %xmm6, %xmm15 +vpunpcklqdq %xmm9, %xmm8, %xmm0 +vpunpcklqdq %xmm11, %xmm10, %xmm1 +vpunpckhqdq %xmm9, %xmm8, %xmm2 +vpunpckhqdq %xmm11, %xmm10, %xmm3 +vpunpcklqdq %xmm13, %xmm12, %xmm4 +vpunpcklqdq %xmm15, %xmm14, %xmm5 +vpunpckhqdq %xmm13, %xmm12, %xmm6 +vpunpckhqdq %xmm15, %xmm14, %xmm7 +andq %rsi, %rsi +jz chacha_blocks_avx2_noinput2 +vpxor 0(%rsi), %xmm0, %xmm0 +vpxor 16(%rsi), %xmm1, %xmm1 +vpxor 64(%rsi), %xmm2, %xmm2 +vpxor 80(%rsi), %xmm3, %xmm3 +vpxor 128(%rsi), %xmm4, %xmm4 +vpxor 144(%rsi), %xmm5, %xmm5 +vpxor 192(%rsi), %xmm6, %xmm6 +vpxor 208(%rsi), %xmm7, %xmm7 +vmovdqu %xmm0, 0(%rdx) +vmovdqu %xmm1, 16(%rdx) +vmovdqu %xmm2, 64(%rdx) +vmovdqu %xmm3, 80(%rdx) +vmovdqu %xmm4, 128(%rdx) +vmovdqu %xmm5, 144(%rdx) +vmovdqu %xmm6, 192(%rdx) +vmovdqu %xmm7, 208(%rdx) +vmovdqa 192(%rsp), %xmm0 +vmovdqa 208(%rsp), %xmm1 +vmovdqa 224(%rsp), %xmm2 +vmovdqa 240(%rsp), %xmm3 +vmovdqa 256(%rsp), %xmm4 +vmovdqa 272(%rsp), %xmm5 +vmovdqa 288(%rsp), %xmm6 +vmovdqa 304(%rsp), %xmm7 +vpbroadcastd 32(%rsp), %xmm8 +vpbroadcastd 4+32(%rsp), %xmm9 +vpbroadcastd 8+32(%rsp), %xmm10 +vpbroadcastd 12+32(%rsp), %xmm11 +vmovdqa 128(%rsp), %xmm12 +vmovdqa 160(%rsp), %xmm13 +vpbroadcastd 8+48(%rsp), %xmm14 +vpbroadcastd 12+48(%rsp), %xmm15 +vpaddd %xmm8, %xmm0, %xmm0 +vpaddd %xmm9, %xmm1, %xmm1 +vpaddd %xmm10, %xmm2, %xmm2 +vpaddd %xmm11, %xmm3, %xmm3 +vpaddd %xmm12, %xmm4, %xmm4 +vpaddd %xmm13, %xmm5, %xmm5 +vpaddd %xmm14, %xmm6, %xmm6 +vpaddd %xmm15, %xmm7, %xmm7 +vpunpckldq %xmm1, %xmm0, %xmm8 +vpunpckldq %xmm3, %xmm2, %xmm9 +vpunpckhdq %xmm1, %xmm0, %xmm12 +vpunpckhdq %xmm3, %xmm2, %xmm13 +vpunpckldq %xmm5, %xmm4, %xmm10 +vpunpckldq %xmm7, %xmm6, %xmm11 +vpunpckhdq %xmm5, %xmm4, %xmm14 +vpunpckhdq %xmm7, %xmm6, %xmm15 +vpunpcklqdq %xmm9, %xmm8, %xmm0 +vpunpcklqdq %xmm11, %xmm10, %xmm1 +vpunpckhqdq %xmm9, %xmm8, %xmm2 +vpunpckhqdq %xmm11, %xmm10, %xmm3 +vpunpcklqdq %xmm13, %xmm12, %xmm4 +vpunpcklqdq %xmm15, %xmm14, %xmm5 +vpunpckhqdq %xmm13, %xmm12, %xmm6 +vpunpckhqdq %xmm15, %xmm14, %xmm7 +vpxor 32(%rsi), %xmm0, %xmm0 +vpxor 48(%rsi), %xmm1, %xmm1 +vpxor 96(%rsi), %xmm2, %xmm2 +vpxor 112(%rsi), %xmm3, %xmm3 +vpxor 160(%rsi), %xmm4, %xmm4 +vpxor 176(%rsi), %xmm5, %xmm5 +vpxor 224(%rsi), %xmm6, %xmm6 +vpxor 240(%rsi), %xmm7, %xmm7 +vmovdqu %xmm0, 32(%rdx) +vmovdqu %xmm1, 48(%rdx) +vmovdqu %xmm2, 96(%rdx) +vmovdqu %xmm3, 112(%rdx) +vmovdqu %xmm4, 160(%rdx) +vmovdqu %xmm5, 176(%rdx) +vmovdqu %xmm6, 224(%rdx) +vmovdqu %xmm7, 240(%rdx) +addq $256, %rsi +jmp chacha_blocks_avx2_mainloop2_cont +chacha_blocks_avx2_noinput2: +vmovdqu %xmm0, 0(%rdx) +vmovdqu %xmm1, 16(%rdx) +vmovdqu %xmm2, 64(%rdx) +vmovdqu %xmm3, 80(%rdx) +vmovdqu %xmm4, 128(%rdx) +vmovdqu %xmm5, 144(%rdx) +vmovdqu %xmm6, 192(%rdx) +vmovdqu %xmm7, 208(%rdx) +vmovdqa 192(%rsp), %xmm0 +vmovdqa 208(%rsp), %xmm1 +vmovdqa 224(%rsp), %xmm2 +vmovdqa 240(%rsp), %xmm3 +vmovdqa 256(%rsp), %xmm4 +vmovdqa 272(%rsp), %xmm5 +vmovdqa 288(%rsp), %xmm6 +vmovdqa 304(%rsp), %xmm7 +vpbroadcastd 32(%rsp), %xmm8 +vpbroadcastd 4+32(%rsp), %xmm9 +vpbroadcastd 8+32(%rsp), %xmm10 +vpbroadcastd 12+32(%rsp), %xmm11 +vmovdqa 128(%rsp), %xmm12 +vmovdqa 160(%rsp), %xmm13 +vpbroadcastd 8+48(%rsp), %xmm14 +vpbroadcastd 12+48(%rsp), %xmm15 +vpaddd %xmm8, %xmm0, %xmm0 +vpaddd %xmm9, %xmm1, %xmm1 +vpaddd %xmm10, %xmm2, %xmm2 +vpaddd %xmm11, %xmm3, %xmm3 +vpaddd %xmm12, %xmm4, %xmm4 +vpaddd %xmm13, %xmm5, %xmm5 +vpaddd %xmm14, %xmm6, %xmm6 +vpaddd %xmm15, %xmm7, %xmm7 +vpunpckldq %xmm1, %xmm0, %xmm8 +vpunpckldq %xmm3, %xmm2, %xmm9 +vpunpckhdq %xmm1, %xmm0, %xmm12 +vpunpckhdq %xmm3, %xmm2, %xmm13 +vpunpckldq %xmm5, %xmm4, %xmm10 +vpunpckldq %xmm7, %xmm6, %xmm11 +vpunpckhdq %xmm5, %xmm4, %xmm14 +vpunpckhdq %xmm7, %xmm6, %xmm15 +vpunpcklqdq %xmm9, %xmm8, %xmm0 +vpunpcklqdq %xmm11, %xmm10, %xmm1 +vpunpckhqdq %xmm9, %xmm8, %xmm2 +vpunpckhqdq %xmm11, %xmm10, %xmm3 +vpunpcklqdq %xmm13, %xmm12, %xmm4 +vpunpcklqdq %xmm15, %xmm14, %xmm5 +vpunpckhqdq %xmm13, %xmm12, %xmm6 +vpunpckhqdq %xmm15, %xmm14, %xmm7 +vmovdqu %xmm0, 32(%rdx) +vmovdqu %xmm1, 48(%rdx) +vmovdqu %xmm2, 96(%rdx) +vmovdqu %xmm3, 112(%rdx) +vmovdqu %xmm4, 160(%rdx) +vmovdqu %xmm5, 176(%rdx) +vmovdqu %xmm6, 224(%rdx) +vmovdqu %xmm7, 240(%rdx) +chacha_blocks_avx2_mainloop2_cont: +addq $256, %rdx +subq $256, %rcx +cmp $256, %rcx +jae chacha_blocks_avx2_atleast256 +chacha_blocks_avx2_below256_fixup: +vmovdqa 448(%rsp), %xmm6 +vmovdqa 480(%rsp), %xmm7 +vmovdqa 0(%rsp), %xmm8 +vmovdqa 16(%rsp), %xmm9 +vmovdqa 32(%rsp), %xmm10 +vmovdqa 48(%rsp), %xmm11 +movq $1, %r9 +chacha_blocks_avx2_below256: +vmovq %r9, %xmm5 +andq %rcx, %rcx +jz chacha_blocks_avx2_done +cmpq $64, %rcx +jae chacha_blocks_avx2_above63 +movq %rdx, %r9 +andq %rsi, %rsi +jz chacha_blocks_avx2_noinput3 +movq %rcx, %r10 +movq %rsp, %rdx +addq %r10, %rsi +addq %r10, %rdx +negq %r10 +chacha_blocks_avx2_copyinput: +movb (%rsi, %r10), %al +movb %al, (%rdx, %r10) +incq %r10 +jnz chacha_blocks_avx2_copyinput +movq %rsp, %rsi +chacha_blocks_avx2_noinput3: +movq %rsp, %rdx +chacha_blocks_avx2_above63: +vmovdqa %xmm8, %xmm0 +vmovdqa %xmm9, %xmm1 +vmovdqa %xmm10, %xmm2 +vmovdqa %xmm11, %xmm3 +movq 64(%rsp), %rax +chacha_blocks_avx2_mainloop3: +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm6, %xmm3, %xmm3 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpslld $12, %xmm1, %xmm4 +vpsrld $20, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm7, %xmm3, %xmm3 +vpshufd $0x93, %xmm0, %xmm0 +vpaddd %xmm2, %xmm3, %xmm2 +vpshufd $0x4e, %xmm3, %xmm3 +vpxor %xmm1, %xmm2, %xmm1 +vpshufd $0x39, %xmm2, %xmm2 +vpslld $7, %xmm1, %xmm4 +vpsrld $25, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm6, %xmm3, %xmm3 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpslld $12, %xmm1, %xmm4 +vpsrld $20, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm7, %xmm3, %xmm3 +vpshufd $0x39, %xmm0, %xmm0 +vpaddd %xmm2, %xmm3, %xmm2 +vpshufd $0x4e, %xmm3, %xmm3 +vpxor %xmm1, %xmm2, %xmm1 +vpshufd $0x93, %xmm2, %xmm2 +vpslld $7, %xmm1, %xmm4 +vpsrld $25, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +subq $2, %rax +jnz chacha_blocks_avx2_mainloop3 +vpaddd %xmm0, %xmm8, %xmm0 +vpaddd %xmm1, %xmm9, %xmm1 +vpaddd %xmm2, %xmm10, %xmm2 +vpaddd %xmm3, %xmm11, %xmm3 +andq %rsi, %rsi +jz chacha_blocks_avx2_noinput4 +vpxor 0(%rsi), %xmm0, %xmm0 +vpxor 16(%rsi), %xmm1, %xmm1 +vpxor 32(%rsi), %xmm2, %xmm2 +vpxor 48(%rsi), %xmm3, %xmm3 +addq $64, %rsi +chacha_blocks_avx2_noinput4: +vmovdqu %xmm0, 0(%rdx) +vmovdqu %xmm1, 16(%rdx) +vmovdqu %xmm2, 32(%rdx) +vmovdqu %xmm3, 48(%rdx) +vpaddq %xmm11, %xmm5, %xmm11 +cmpq $64, %rcx +jbe chacha_blocks_avx2_mainloop3_finishup +addq $64, %rdx +subq $64, %rcx +jmp chacha_blocks_avx2_below256 +chacha_blocks_avx2_mainloop3_finishup: +cmpq $64, %rcx +je chacha_blocks_avx2_done +addq %rcx, %r9 +addq %rcx, %rdx +negq %rcx +chacha_blocks_avx2_copyoutput: +movb (%rdx, %rcx), %al +movb %al, (%r9, %rcx) +incq %rcx +jnz chacha_blocks_avx2_copyoutput +chacha_blocks_avx2_done: +vmovdqu %xmm11, 32(%rdi) +movq %rbp, %rsp +popq %r14 +popq %r13 +popq %r12 +popq %rbp +popq %rbx +vzeroupper +ret +FN_END chacha_blocks_avx2 + + +GLOBAL_HIDDEN_FN hchacha_avx2 +hchacha_avx2_local: +LOAD_VAR_PIC chacha_constants, %rax +vmovdqa 0(%rax), %xmm0 +vmovdqa 16(%rax), %xmm6 +vmovdqa 32(%rax), %xmm5 +vmovdqu 0(%rdi), %xmm1 +vmovdqu 16(%rdi), %xmm2 +vmovdqu 0(%rsi), %xmm3 +hhacha_mainloop_avx2: +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm6, %xmm3, %xmm3 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpslld $12, %xmm1, %xmm4 +vpsrld $20, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm5, %xmm3, %xmm3 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpslld $7, %xmm1, %xmm4 +vpsrld $25, %xmm1, %xmm1 +vpshufd $0x93, %xmm0, %xmm0 +vpxor %xmm1, %xmm4, %xmm1 +vpshufd $0x4e, %xmm3, %xmm3 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm6, %xmm3, %xmm3 +vpshufd $0x39, %xmm2, %xmm2 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpslld $12, %xmm1, %xmm4 +vpsrld $20, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm5, %xmm3, %xmm3 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpshufd $0x39, %xmm0, %xmm0 +vpslld $7, %xmm1, %xmm4 +vpshufd $0x4e, %xmm3, %xmm3 +vpsrld $25, %xmm1, %xmm1 +vpshufd $0x93, %xmm2, %xmm2 +vpxor %xmm1, %xmm4, %xmm1 +subl $2, %ecx +jne hhacha_mainloop_avx2 +vmovdqu %xmm0, (%rdx) +vmovdqu %xmm3, 16(%rdx) +ret +FN_END hchacha_avx2 + +GLOBAL_HIDDEN_FN_EXT chacha_avx2, 6, 16 +pushq %rbp +movq %rsp, %rbp +subq $64, %rsp +andq $~63, %rsp +vmovdqu 0(%rdi), %xmm0 +vmovdqu 16(%rdi), %xmm1 +vmovdqa %xmm0, 0(%rsp) +vmovdqa %xmm1, 16(%rsp) +xorq %rdi, %rdi +movq %rdi, 32(%rsp) +movq 0(%rsi), %rsi +movq %rsi, 40(%rsp) +movq %r9, 48(%rsp) +movq %rsp, %rdi +movq %rdx, %rsi +movq %rcx, %rdx +movq %r8, %rcx +call chacha_blocks_avx2_local +vpxor %xmm0, %xmm0, %xmm0 +vmovdqa %xmm0, 0(%rsp) +vmovdqa %xmm0, 16(%rsp) +vmovdqa %xmm0, 32(%rsp) +movq %rbp, %rsp +popq %rbp +ret +FN_END chacha_avx2 + +GLOBAL_HIDDEN_FN_EXT xchacha_avx2, 6, 16 +pushq %rbp +pushq %rbx +movq %rsp, %rbp +subq $64, %rsp +andq $~63, %rsp +movq %rsp, %rbx +xorq %rax, %rax +movq %rax, 32(%rbx) +movq 16(%rsi), %rax +movq %rax, 40(%rbx) +movq %r9, 48(%rbx) +pushq %rdx +pushq %rcx +pushq %r8 +movq %rbx, %rdx +movq %r9, %rcx +call hchacha_avx2_local +movq %rbx, %rdi +popq %rcx +popq %rdx +popq %rsi +call chacha_blocks_avx2_local +vpxor %xmm0, %xmm0, %xmm0 +vmovdqa %xmm0, 0(%rbx) +vmovdqa %xmm0, 16(%rbx) +vmovdqa %xmm0, 32(%rbx) +movq %rbp, %rsp +popq %rbx +popq %rbp +ret +FN_END xchacha_avx2 diff --git a/src/libcryptobox/chacha20/chacha.c b/src/libcryptobox/chacha20/chacha.c new file mode 100644 index 0000000..0b471c8 --- /dev/null +++ b/src/libcryptobox/chacha20/chacha.c @@ -0,0 +1,262 @@ +/* Copyright (c) 2015, Vsevolod Stakhov + * Copyright (c) 2015, Andrew Moon + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "cryptobox.h" +#include "chacha.h" +#include "platform_config.h" + +extern unsigned cpu_config; + +typedef struct chacha_impl_t { + unsigned long cpu_flags; + const char *desc; + void (*chacha)(const chacha_key *key, const chacha_iv *iv, + const unsigned char *in, unsigned char *out, size_t inlen, + size_t rounds); + void (*xchacha)(const chacha_key *key, const chacha_iv24 *iv, + const unsigned char *in, unsigned char *out, size_t inlen, + size_t rounds); + void (*chacha_blocks)(chacha_state_internal *state, + const unsigned char *in, unsigned char *out, size_t bytes); + void (*hchacha)(const unsigned char key[32], const unsigned char iv[16], + unsigned char out[32], size_t rounds); +} chacha_impl_t; + +#define CHACHA_DECLARE(ext) \ + void chacha_##ext(const chacha_key *key, const chacha_iv *iv, const unsigned char *in, unsigned char *out, size_t inlen, size_t rounds); \ + void xchacha_##ext(const chacha_key *key, const chacha_iv24 *iv, const unsigned char *in, unsigned char *out, size_t inlen, size_t rounds); \ + void chacha_blocks_##ext(chacha_state_internal *state, const unsigned char *in, unsigned char *out, size_t bytes); \ + void hchacha_##ext(const unsigned char key[32], const unsigned char iv[16], unsigned char out[32], size_t rounds); +#define CHACHA_IMPL(cpuflags, desc, ext) \ + { \ + (cpuflags), desc, chacha_##ext, xchacha_##ext, chacha_blocks_##ext, hchacha_##ext \ + } + +#if defined(HAVE_AVX2) && defined(__x86_64__) +CHACHA_DECLARE(avx2) +#define CHACHA_AVX2 CHACHA_IMPL(CPUID_AVX2, "avx2", avx2) +#endif +#if defined(HAVE_AVX) && defined(__x86_64__) +CHACHA_DECLARE(avx) +#define CHACHA_AVX CHACHA_IMPL(CPUID_AVX, "avx", avx) +#endif +#if defined(HAVE_SSE2) && defined(__x86_64__) +CHACHA_DECLARE(sse2) +#define CHACHA_SSE2 CHACHA_IMPL(CPUID_SSE2, "sse2", sse2) +#endif + +CHACHA_DECLARE(ref) +#define CHACHA_GENERIC CHACHA_IMPL(0, "generic", ref) + +static const chacha_impl_t chacha_list[] = { + CHACHA_GENERIC, +#if defined(CHACHA_AVX2) && defined(__x86_64__) + CHACHA_AVX2, +#endif +#if defined(CHACHA_AVX) && defined(__x86_64__) + CHACHA_AVX, +#endif +#if defined(CHACHA_SSE2) && defined(__x86_64__) + CHACHA_SSE2 +#endif +}; + +static const chacha_impl_t *chacha_impl = &chacha_list[0]; + +static int +chacha_is_aligned(const void *p) +{ + return ((size_t) p & (sizeof(size_t) - 1)) == 0; +} + +const char * +chacha_load(void) +{ + guint i; + + if (cpu_config != 0) { + for (i = 0; i < G_N_ELEMENTS(chacha_list); i++) { + if (chacha_list[i].cpu_flags & cpu_config) { + chacha_impl = &chacha_list[i]; + break; + } + } + } + + return chacha_impl->desc; +} + +void chacha_init(chacha_state *S, const chacha_key *key, + const chacha_iv *iv, size_t rounds) +{ + chacha_state_internal *state = (chacha_state_internal *) S; + memcpy(state->s + 0, key, 32); + memset(state->s + 32, 0, 8); + memcpy(state->s + 40, iv, 8); + state->rounds = rounds; + state->leftover = 0; +} + +/* processes inlen bytes (can do partial blocks), handling input/output alignment */ +static void +chacha_consume(chacha_state_internal *state, + const unsigned char *in, unsigned char *out, size_t inlen) +{ + unsigned char buffer[16 * CHACHA_BLOCKBYTES]; + int in_aligned, out_aligned; + + /* it's ok to call with 0 bytes */ + if (!inlen) + return; + + /* if everything is aligned, handle directly */ + in_aligned = chacha_is_aligned(in); + out_aligned = chacha_is_aligned(out); + if (in_aligned && out_aligned) { + chacha_impl->chacha_blocks(state, in, out, inlen); + return; + } + + /* copy the unaligned data to an aligned buffer and process in chunks */ + while (inlen) { + const size_t bytes = (inlen > sizeof(buffer)) ? sizeof(buffer) : inlen; + const unsigned char *src = in; + unsigned char *dst = (out_aligned) ? out : buffer; + if (!in_aligned) { + memcpy(buffer, in, bytes); + src = buffer; + } + chacha_impl->chacha_blocks(state, src, dst, bytes); + if (!out_aligned) + memcpy(out, buffer, bytes); + if (in) + in += bytes; + out += bytes; + inlen -= bytes; + } +} + +/* hchacha */ +void hchacha(const unsigned char key[32], + const unsigned char iv[16], unsigned char out[32], size_t rounds) +{ + chacha_impl->hchacha(key, iv, out, rounds); +} + +/* update, returns number of bytes written to out */ +size_t +chacha_update(chacha_state *S, const unsigned char *in, unsigned char *out, + size_t inlen) +{ + chacha_state_internal *state = (chacha_state_internal *) S; + unsigned char *out_start = out; + size_t bytes; + + /* enough for at least one block? */ + while ((state->leftover + inlen) >= CHACHA_BLOCKBYTES) { + /* handle the previous data */ + if (state->leftover) { + bytes = (CHACHA_BLOCKBYTES - state->leftover); + if (in) { + memcpy(state->buffer + state->leftover, in, bytes); + in += bytes; + } + chacha_consume(state, (in) ? state->buffer : NULL, out, + CHACHA_BLOCKBYTES); + inlen -= bytes; + out += CHACHA_BLOCKBYTES; + state->leftover = 0; + } + + /* handle the direct data */ + bytes = (inlen & ~(CHACHA_BLOCKBYTES - 1)); + if (bytes) { + chacha_consume(state, in, out, bytes); + inlen -= bytes; + if (in) + in += bytes; + out += bytes; + } + } + + /* handle leftover data */ + if (inlen) { + if (in) + memcpy(state->buffer + state->leftover, in, inlen); + else + memset(state->buffer + state->leftover, 0, inlen); + state->leftover += inlen; + } + + return out - out_start; +} + +/* finalize, write out any leftover data */ +size_t +chacha_final(chacha_state *S, unsigned char *out) +{ + chacha_state_internal *state = (chacha_state_internal *) S; + size_t leftover = state->leftover; + if (leftover) { + if (chacha_is_aligned(out)) { + chacha_impl->chacha_blocks(state, state->buffer, out, leftover); + } + else { + chacha_impl->chacha_blocks(state, state->buffer, state->buffer, + leftover); + memcpy(out, state->buffer, leftover); + } + } + rspamd_explicit_memzero(S, sizeof(chacha_state)); + return leftover; +} + +/* one-shot, input/output assumed to be word aligned */ +void chacha(const chacha_key *key, const chacha_iv *iv, + const unsigned char *in, unsigned char *out, size_t inlen, + size_t rounds) +{ + chacha_impl->chacha(key, iv, in, out, inlen, rounds); +} + +/* + xchacha, chacha with a 192 bit nonce + */ + +void xchacha_init(chacha_state *S, const chacha_key *key, + const chacha_iv24 *iv, size_t rounds) +{ + chacha_key subkey; + hchacha(key->b, iv->b, subkey.b, rounds); + chacha_init(S, &subkey, (chacha_iv *) (iv->b + 16), rounds); +} + +/* one-shot, input/output assumed to be word aligned */ +void xchacha(const chacha_key *key, const chacha_iv24 *iv, + const unsigned char *in, unsigned char *out, size_t inlen, + size_t rounds) +{ + chacha_impl->xchacha(key, iv, in, out, inlen, rounds); +} diff --git a/src/libcryptobox/chacha20/chacha.h b/src/libcryptobox/chacha20/chacha.h new file mode 100644 index 0000000..d05088a --- /dev/null +++ b/src/libcryptobox/chacha20/chacha.h @@ -0,0 +1,87 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Andrew Moon, Vsevolod Stakhov + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + + +#ifndef CHACHA_H_ +#define CHACHA_H_ + + +#define CHACHA_BLOCKBYTES 64 + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct chacha_state_internal_t { + unsigned char s[48]; + size_t rounds; + size_t leftover; + unsigned char buffer[CHACHA_BLOCKBYTES]; +} chacha_state_internal; + +typedef struct chacha_state_t { + unsigned char opaque[128]; +} chacha_state; + +typedef struct chacha_key_t { + unsigned char b[32]; +} chacha_key; + +typedef struct chacha_iv_t { + unsigned char b[8]; +} chacha_iv; + +typedef struct chacha_iv24_t { + unsigned char b[24]; +} chacha_iv24; + +void hchacha(const unsigned char key[32], const unsigned char iv[16], + unsigned char out[32], size_t rounds); + +void chacha_init(chacha_state *S, const chacha_key *key, const chacha_iv *iv, + size_t rounds); + +void xchacha_init(chacha_state *S, const chacha_key *key, + const chacha_iv24 *iv, size_t rounds); + +size_t chacha_update(chacha_state *S, const unsigned char *in, + unsigned char *out, size_t inlen); + +size_t chacha_final(chacha_state *S, unsigned char *out); + +void chacha(const chacha_key *key, const chacha_iv *iv, + const unsigned char *in, unsigned char *out, size_t inlen, + size_t rounds); + +void xchacha(const chacha_key *key, const chacha_iv24 *iv, + const unsigned char *in, unsigned char *out, size_t inlen, + size_t rounds); + +const char *chacha_load(void); + +#ifdef __cplusplus +} +#endif + +#endif /* CHACHA_H_ */ diff --git a/src/libcryptobox/chacha20/constants.S b/src/libcryptobox/chacha20/constants.S new file mode 100644 index 0000000..ff109a3 --- /dev/null +++ b/src/libcryptobox/chacha20/constants.S @@ -0,0 +1,6 @@ +SECTION_RODATA +.p2align 4,,15 +chacha_constants: +.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 /* "expand 32-byte k" */ +.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 /* pshufb rotate by 16 */ +.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 /* pshufb rotate by 8 */ diff --git a/src/libcryptobox/chacha20/ref.c b/src/libcryptobox/chacha20/ref.c new file mode 100644 index 0000000..ee646db --- /dev/null +++ b/src/libcryptobox/chacha20/ref.c @@ -0,0 +1,272 @@ +#include "config.h" +#include "chacha.h" +#include "cryptobox.h" + +#if defined(HAVE_INT32) +typedef uint32_t chacha_int32; +#else +typedef guint32 chacha_int32; +#endif + +/* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */ +static chacha_int32 +U8TO32(const unsigned char *p) +{ + return (((chacha_int32) (p[0])) | + ((chacha_int32) (p[1]) << 8) | + ((chacha_int32) (p[2]) << 16) | + ((chacha_int32) (p[3]) << 24)); +} + +/* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */ +static void +U32TO8(unsigned char *p, chacha_int32 v) +{ + p[0] = (v) &0xff; + p[1] = (v >> 8) & 0xff; + p[2] = (v >> 16) & 0xff; + p[3] = (v >> 24) & 0xff; +} + +/* 32 bit left rotate */ +static chacha_int32 +ROTL32(chacha_int32 x, int k) +{ + return ((x << k) | (x >> (32 - k))) & 0xffffffff; +} + +/* "expand 32-byte k", as 4 little endian 32-bit unsigned integers */ +static const chacha_int32 chacha_constants[4] = { + 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574}; + +void chacha_blocks_ref(chacha_state_internal *state, const unsigned char *in, unsigned char *out, size_t bytes) +{ + chacha_int32 x[16], j[12]; + chacha_int32 t; + unsigned char *ctarget = out, tmp[64]; + size_t i, r; + + if (!bytes) return; + + j[0] = U8TO32(state->s + 0); + j[1] = U8TO32(state->s + 4); + j[2] = U8TO32(state->s + 8); + j[3] = U8TO32(state->s + 12); + j[4] = U8TO32(state->s + 16); + j[5] = U8TO32(state->s + 20); + j[6] = U8TO32(state->s + 24); + j[7] = U8TO32(state->s + 28); + j[8] = U8TO32(state->s + 32); + j[9] = U8TO32(state->s + 36); + j[10] = U8TO32(state->s + 40); + j[11] = U8TO32(state->s + 44); + + r = state->rounds; + + for (;;) { + if (bytes < 64) { + if (in) { + for (i = 0; i < bytes; i++) tmp[i] = in[i]; + in = tmp; + } + ctarget = out; + out = tmp; + } + + x[0] = chacha_constants[0]; + x[1] = chacha_constants[1]; + x[2] = chacha_constants[2]; + x[3] = chacha_constants[3]; + x[4] = j[0]; + x[5] = j[1]; + x[6] = j[2]; + x[7] = j[3]; + x[8] = j[4]; + x[9] = j[5]; + x[10] = j[6]; + x[11] = j[7]; + x[12] = j[8]; + x[13] = j[9]; + x[14] = j[10]; + x[15] = j[11]; + +#define quarter(a, b, c, d) \ + a += b; \ + t = d ^ a; \ + d = ROTL32(t, 16); \ + c += d; \ + t = b ^ c; \ + b = ROTL32(t, 12); \ + a += b; \ + t = d ^ a; \ + d = ROTL32(t, 8); \ + c += d; \ + t = b ^ c; \ + b = ROTL32(t, 7); + +#define doubleround() \ + quarter(x[0], x[4], x[8], x[12]) \ + quarter(x[1], x[5], x[9], x[13]) \ + quarter(x[2], x[6], x[10], x[14]) \ + quarter(x[3], x[7], x[11], x[15]) \ + quarter(x[0], x[5], x[10], x[15]) \ + quarter(x[1], x[6], x[11], x[12]) \ + quarter(x[2], x[7], x[8], x[13]) \ + quarter(x[3], x[4], x[9], x[14]) + + i = r; + do { + doubleround() + i -= 2; + } while (i); + + x[0] += chacha_constants[0]; + x[1] += chacha_constants[1]; + x[2] += chacha_constants[2]; + x[3] += chacha_constants[3]; + x[4] += j[0]; + x[5] += j[1]; + x[6] += j[2]; + x[7] += j[3]; + x[8] += j[4]; + x[9] += j[5]; + x[10] += j[6]; + x[11] += j[7]; + x[12] += j[8]; + x[13] += j[9]; + x[14] += j[10]; + x[15] += j[11]; + + if (in) { + U32TO8(out + 0, x[0] ^ U8TO32(in + 0)); + U32TO8(out + 4, x[1] ^ U8TO32(in + 4)); + U32TO8(out + 8, x[2] ^ U8TO32(in + 8)); + U32TO8(out + 12, x[3] ^ U8TO32(in + 12)); + U32TO8(out + 16, x[4] ^ U8TO32(in + 16)); + U32TO8(out + 20, x[5] ^ U8TO32(in + 20)); + U32TO8(out + 24, x[6] ^ U8TO32(in + 24)); + U32TO8(out + 28, x[7] ^ U8TO32(in + 28)); + U32TO8(out + 32, x[8] ^ U8TO32(in + 32)); + U32TO8(out + 36, x[9] ^ U8TO32(in + 36)); + U32TO8(out + 40, x[10] ^ U8TO32(in + 40)); + U32TO8(out + 44, x[11] ^ U8TO32(in + 44)); + U32TO8(out + 48, x[12] ^ U8TO32(in + 48)); + U32TO8(out + 52, x[13] ^ U8TO32(in + 52)); + U32TO8(out + 56, x[14] ^ U8TO32(in + 56)); + U32TO8(out + 60, x[15] ^ U8TO32(in + 60)); + in += 64; + } + else { + U32TO8(out + 0, x[0]); + U32TO8(out + 4, x[1]); + U32TO8(out + 8, x[2]); + U32TO8(out + 12, x[3]); + U32TO8(out + 16, x[4]); + U32TO8(out + 20, x[5]); + U32TO8(out + 24, x[6]); + U32TO8(out + 28, x[7]); + U32TO8(out + 32, x[8]); + U32TO8(out + 36, x[9]); + U32TO8(out + 40, x[10]); + U32TO8(out + 44, x[11]); + U32TO8(out + 48, x[12]); + U32TO8(out + 52, x[13]); + U32TO8(out + 56, x[14]); + U32TO8(out + 60, x[15]); + } + + /* increment the 64 bit counter, split in to two 32 bit halves */ + j[8]++; + if (!j[8]) + j[9]++; + + if (bytes <= 64) { + if (bytes < 64) + for (i = 0; i < bytes; i++) ctarget[i] = out[i]; + + /* store the counter back to the state */ + U32TO8(state->s + 32, j[8]); + U32TO8(state->s + 36, j[9]); + goto cleanup; + } + bytes -= 64; + out += 64; + } + +cleanup: + rspamd_explicit_memzero(j, sizeof(j)); +} + +void hchacha_ref(const unsigned char key[32], const unsigned char iv[16], unsigned char out[32], size_t rounds) +{ + chacha_int32 x[16]; + chacha_int32 t; + + x[0] = chacha_constants[0]; + x[1] = chacha_constants[1]; + x[2] = chacha_constants[2]; + x[3] = chacha_constants[3]; + x[4] = U8TO32(key + 0); + x[5] = U8TO32(key + 4); + x[6] = U8TO32(key + 8); + x[7] = U8TO32(key + 12); + x[8] = U8TO32(key + 16); + x[9] = U8TO32(key + 20); + x[10] = U8TO32(key + 24); + x[11] = U8TO32(key + 28); + x[12] = U8TO32(iv + 0); + x[13] = U8TO32(iv + 4); + x[14] = U8TO32(iv + 8); + x[15] = U8TO32(iv + 12); + + do { + doubleround() + rounds -= 2; + } while (rounds); + + /* indices for the chacha constant */ + U32TO8(out + 0, x[0]); + U32TO8(out + 4, x[1]); + U32TO8(out + 8, x[2]); + U32TO8(out + 12, x[3]); + + /* indices for the iv */ + U32TO8(out + 16, x[12]); + U32TO8(out + 20, x[13]); + U32TO8(out + 24, x[14]); + U32TO8(out + 28, x[15]); +} + +void chacha_clear_state_ref(chacha_state_internal *state) +{ + rspamd_explicit_memzero(state, 48); +} + +void chacha_ref(const chacha_key *key, const chacha_iv *iv, const unsigned char *in, unsigned char *out, size_t inlen, size_t rounds) +{ + chacha_state_internal state; + size_t i; + for (i = 0; i < 32; i++) + state.s[i + 0] = key->b[i]; + for (i = 0; i < 8; i++) + state.s[i + 32] = 0; + for (i = 0; i < 8; i++) + state.s[i + 40] = iv->b[i]; + state.rounds = rounds; + chacha_blocks_ref(&state, in, out, inlen); + chacha_clear_state_ref(&state); +} + +void xchacha_ref(const chacha_key *key, const chacha_iv24 *iv, const unsigned char *in, unsigned char *out, size_t inlen, size_t rounds) +{ + chacha_state_internal state; + size_t i; + hchacha_ref(key->b, iv->b, &state.s[0], rounds); + for (i = 0; i < 8; i++) + state.s[i + 32] = 0; + for (i = 0; i < 8; i++) + state.s[i + 40] = iv->b[i + 16]; + state.rounds = rounds; + chacha_blocks_ref(&state, in, out, inlen); + chacha_clear_state_ref(&state); +} diff --git a/src/libcryptobox/chacha20/sse2.S b/src/libcryptobox/chacha20/sse2.S new file mode 100644 index 0000000..a91d095 --- /dev/null +++ b/src/libcryptobox/chacha20/sse2.S @@ -0,0 +1,734 @@ +#include "../macro.S" +#include "constants.S" +SECTION_TEXT + +GLOBAL_HIDDEN_FN chacha_blocks_sse2 +chacha_blocks_sse2_local: +pushq %rbx +pushq %rbp +movq %rsp, %rbp +andq $~63, %rsp +subq $512, %rsp +movq $0x3320646e61707865, %rax +movq $0x6b20657479622d32, %r8 +movd %rax, %xmm8 +movd %r8, %xmm14 +punpcklqdq %xmm14, %xmm8 +movdqu 0(%rdi), %xmm9 +movdqu 16(%rdi), %xmm10 +movdqu 32(%rdi), %xmm11 +movq 48(%rdi), %rax +movq $1, %r9 +movdqa %xmm8, 0(%rsp) +movdqa %xmm9, 16(%rsp) +movdqa %xmm10, 32(%rsp) +movdqa %xmm11, 48(%rsp) +movq %rax, 64(%rsp) +cmpq $256, %rcx +jb chacha_blocks_sse2_below256 +pshufd $0x00, %xmm8, %xmm0 +pshufd $0x55, %xmm8, %xmm1 +pshufd $0xaa, %xmm8, %xmm2 +pshufd $0xff, %xmm8, %xmm3 +movdqa %xmm0, 128(%rsp) +movdqa %xmm1, 144(%rsp) +movdqa %xmm2, 160(%rsp) +movdqa %xmm3, 176(%rsp) +pshufd $0x00, %xmm9, %xmm0 +pshufd $0x55, %xmm9, %xmm1 +pshufd $0xaa, %xmm9, %xmm2 +pshufd $0xff, %xmm9, %xmm3 +movdqa %xmm0, 192(%rsp) +movdqa %xmm1, 208(%rsp) +movdqa %xmm2, 224(%rsp) +movdqa %xmm3, 240(%rsp) +pshufd $0x00, %xmm10, %xmm0 +pshufd $0x55, %xmm10, %xmm1 +pshufd $0xaa, %xmm10, %xmm2 +pshufd $0xff, %xmm10, %xmm3 +movdqa %xmm0, 256(%rsp) +movdqa %xmm1, 272(%rsp) +movdqa %xmm2, 288(%rsp) +movdqa %xmm3, 304(%rsp) +pshufd $0xaa, %xmm11, %xmm0 +pshufd $0xff, %xmm11, %xmm1 +movdqa %xmm0, 352(%rsp) +movdqa %xmm1, 368(%rsp) +jmp chacha_blocks_sse2_atleast256 +.p2align 6,,63 +chacha_blocks_sse2_atleast256: +movq 48(%rsp), %rax +leaq 1(%rax), %r8 +leaq 2(%rax), %r9 +leaq 3(%rax), %r10 +leaq 4(%rax), %rbx +movl %eax, 320(%rsp) +movl %r8d, 4+320(%rsp) +movl %r9d, 8+320(%rsp) +movl %r10d, 12+320(%rsp) +shrq $32, %rax +shrq $32, %r8 +shrq $32, %r9 +shrq $32, %r10 +movl %eax, 336(%rsp) +movl %r8d, 4+336(%rsp) +movl %r9d, 8+336(%rsp) +movl %r10d, 12+336(%rsp) +movq %rbx, 48(%rsp) +movq 64(%rsp), %rax +movdqa 128(%rsp), %xmm0 +movdqa 144(%rsp), %xmm1 +movdqa 160(%rsp), %xmm2 +movdqa 176(%rsp), %xmm3 +movdqa 192(%rsp), %xmm4 +movdqa 208(%rsp), %xmm5 +movdqa 224(%rsp), %xmm6 +movdqa 240(%rsp), %xmm7 +movdqa 256(%rsp), %xmm8 +movdqa 272(%rsp), %xmm9 +movdqa 288(%rsp), %xmm10 +movdqa 304(%rsp), %xmm11 +movdqa 320(%rsp), %xmm12 +movdqa 336(%rsp), %xmm13 +movdqa 352(%rsp), %xmm14 +movdqa 368(%rsp), %xmm15 +chacha_blocks_sse2_mainloop1: +paddd %xmm4, %xmm0 +paddd %xmm5, %xmm1 +pxor %xmm0, %xmm12 +pxor %xmm1, %xmm13 +paddd %xmm6, %xmm2 +paddd %xmm7, %xmm3 +movdqa %xmm6, 96(%rsp) +pxor %xmm2, %xmm14 +pxor %xmm3, %xmm15 +pshuflw $0xb1,%xmm12,%xmm12 +pshufhw $0xb1,%xmm12,%xmm12 +pshuflw $0xb1,%xmm13,%xmm13 +pshufhw $0xb1,%xmm13,%xmm13 +pshuflw $0xb1,%xmm14,%xmm14 +pshufhw $0xb1,%xmm14,%xmm14 +pshuflw $0xb1,%xmm15,%xmm15 +pshufhw $0xb1,%xmm15,%xmm15 +paddd %xmm12, %xmm8 +paddd %xmm13, %xmm9 +paddd %xmm14, %xmm10 +paddd %xmm15, %xmm11 +movdqa %xmm12, 112(%rsp) +pxor %xmm8, %xmm4 +pxor %xmm9, %xmm5 +movdqa 96(%rsp), %xmm6 +movdqa %xmm4, %xmm12 +pslld $ 12, %xmm4 +psrld $20, %xmm12 +pxor %xmm12, %xmm4 +movdqa %xmm5, %xmm12 +pslld $ 12, %xmm5 +psrld $20, %xmm12 +pxor %xmm12, %xmm5 +pxor %xmm10, %xmm6 +pxor %xmm11, %xmm7 +movdqa %xmm6, %xmm12 +pslld $ 12, %xmm6 +psrld $20, %xmm12 +pxor %xmm12, %xmm6 +movdqa %xmm7, %xmm12 +pslld $ 12, %xmm7 +psrld $20, %xmm12 +pxor %xmm12, %xmm7 +movdqa 112(%rsp), %xmm12 +paddd %xmm4, %xmm0 +paddd %xmm5, %xmm1 +pxor %xmm0, %xmm12 +pxor %xmm1, %xmm13 +paddd %xmm6, %xmm2 +paddd %xmm7, %xmm3 +movdqa %xmm6, 96(%rsp) +pxor %xmm2, %xmm14 +pxor %xmm3, %xmm15 +movdqa %xmm12, %xmm6 +pslld $ 8, %xmm12 +psrld $24, %xmm6 +pxor %xmm6, %xmm12 +movdqa %xmm13, %xmm6 +pslld $ 8, %xmm13 +psrld $24, %xmm6 +pxor %xmm6, %xmm13 +paddd %xmm12, %xmm8 +paddd %xmm13, %xmm9 +movdqa %xmm14, %xmm6 +pslld $ 8, %xmm14 +psrld $24, %xmm6 +pxor %xmm6, %xmm14 +movdqa %xmm15, %xmm6 +pslld $ 8, %xmm15 +psrld $24, %xmm6 +pxor %xmm6, %xmm15 +paddd %xmm14, %xmm10 +paddd %xmm15, %xmm11 +movdqa %xmm12, 112(%rsp) +pxor %xmm8, %xmm4 +pxor %xmm9, %xmm5 +movdqa 96(%rsp), %xmm6 +movdqa %xmm4, %xmm12 +pslld $ 7, %xmm4 +psrld $25, %xmm12 +pxor %xmm12, %xmm4 +movdqa %xmm5, %xmm12 +pslld $ 7, %xmm5 +psrld $25, %xmm12 +pxor %xmm12, %xmm5 +pxor %xmm10, %xmm6 +pxor %xmm11, %xmm7 +movdqa %xmm6, %xmm12 +pslld $ 7, %xmm6 +psrld $25, %xmm12 +pxor %xmm12, %xmm6 +movdqa %xmm7, %xmm12 +pslld $ 7, %xmm7 +psrld $25, %xmm12 +pxor %xmm12, %xmm7 +movdqa 112(%rsp), %xmm12 +paddd %xmm5, %xmm0 +paddd %xmm6, %xmm1 +pxor %xmm0, %xmm15 +pxor %xmm1, %xmm12 +paddd %xmm7, %xmm2 +paddd %xmm4, %xmm3 +movdqa %xmm7, 96(%rsp) +pxor %xmm2, %xmm13 +pxor %xmm3, %xmm14 +pshuflw $0xb1,%xmm15,%xmm15 +pshufhw $0xb1,%xmm15,%xmm15 +pshuflw $0xb1,%xmm12,%xmm12 +pshufhw $0xb1,%xmm12,%xmm12 +pshuflw $0xb1,%xmm13,%xmm13 +pshufhw $0xb1,%xmm13,%xmm13 +pshuflw $0xb1,%xmm14,%xmm14 +pshufhw $0xb1,%xmm14,%xmm14 +paddd %xmm15, %xmm10 +paddd %xmm12, %xmm11 +paddd %xmm13, %xmm8 +paddd %xmm14, %xmm9 +movdqa %xmm15, 112(%rsp) +pxor %xmm10, %xmm5 +pxor %xmm11, %xmm6 +movdqa 96(%rsp), %xmm7 +movdqa %xmm5, %xmm15 +pslld $ 12, %xmm5 +psrld $20, %xmm15 +pxor %xmm15, %xmm5 +movdqa %xmm6, %xmm15 +pslld $ 12, %xmm6 +psrld $20, %xmm15 +pxor %xmm15, %xmm6 +pxor %xmm8, %xmm7 +pxor %xmm9, %xmm4 +movdqa %xmm7, %xmm15 +pslld $ 12, %xmm7 +psrld $20, %xmm15 +pxor %xmm15, %xmm7 +movdqa %xmm4, %xmm15 +pslld $ 12, %xmm4 +psrld $20, %xmm15 +pxor %xmm15, %xmm4 +movdqa 112(%rsp), %xmm15 +paddd %xmm5, %xmm0 +paddd %xmm6, %xmm1 +pxor %xmm0, %xmm15 +pxor %xmm1, %xmm12 +paddd %xmm7, %xmm2 +paddd %xmm4, %xmm3 +movdqa %xmm7, 96(%rsp) +pxor %xmm2, %xmm13 +pxor %xmm3, %xmm14 +movdqa %xmm15, %xmm7 +pslld $ 8, %xmm15 +psrld $24, %xmm7 +pxor %xmm7, %xmm15 +movdqa %xmm12, %xmm7 +pslld $ 8, %xmm12 +psrld $24, %xmm7 +pxor %xmm7, %xmm12 +paddd %xmm15, %xmm10 +paddd %xmm12, %xmm11 +movdqa %xmm13, %xmm7 +pslld $ 8, %xmm13 +psrld $24, %xmm7 +pxor %xmm7, %xmm13 +movdqa %xmm14, %xmm7 +pslld $ 8, %xmm14 +psrld $24, %xmm7 +pxor %xmm7, %xmm14 +paddd %xmm13, %xmm8 +paddd %xmm14, %xmm9 +movdqa %xmm15, 112(%rsp) +pxor %xmm10, %xmm5 +pxor %xmm11, %xmm6 +movdqa 96(%rsp), %xmm7 +movdqa %xmm5, %xmm15 +pslld $ 7, %xmm5 +psrld $25, %xmm15 +pxor %xmm15, %xmm5 +movdqa %xmm6, %xmm15 +pslld $ 7, %xmm6 +psrld $25, %xmm15 +pxor %xmm15, %xmm6 +pxor %xmm8, %xmm7 +pxor %xmm9, %xmm4 +movdqa %xmm7, %xmm15 +pslld $ 7, %xmm7 +psrld $25, %xmm15 +pxor %xmm15, %xmm7 +movdqa %xmm4, %xmm15 +pslld $ 7, %xmm4 +psrld $25, %xmm15 +pxor %xmm15, %xmm4 +movdqa 112(%rsp), %xmm15 +subq $2, %rax +jnz chacha_blocks_sse2_mainloop1 +paddd 128(%rsp), %xmm0 +paddd 144(%rsp), %xmm1 +paddd 160(%rsp), %xmm2 +paddd 176(%rsp), %xmm3 +paddd 192(%rsp), %xmm4 +paddd 208(%rsp), %xmm5 +paddd 224(%rsp), %xmm6 +paddd 240(%rsp), %xmm7 +paddd 256(%rsp), %xmm8 +paddd 272(%rsp), %xmm9 +paddd 288(%rsp), %xmm10 +paddd 304(%rsp), %xmm11 +paddd 320(%rsp), %xmm12 +paddd 336(%rsp), %xmm13 +paddd 352(%rsp), %xmm14 +paddd 368(%rsp), %xmm15 +movdqa %xmm8, 384(%rsp) +movdqa %xmm9, 400(%rsp) +movdqa %xmm10, 416(%rsp) +movdqa %xmm11, 432(%rsp) +movdqa %xmm12, 448(%rsp) +movdqa %xmm13, 464(%rsp) +movdqa %xmm14, 480(%rsp) +movdqa %xmm15, 496(%rsp) +movdqa %xmm0, %xmm8 +movdqa %xmm2, %xmm9 +movdqa %xmm4, %xmm10 +movdqa %xmm6, %xmm11 +punpckhdq %xmm1, %xmm0 +punpckhdq %xmm3, %xmm2 +punpckhdq %xmm5, %xmm4 +punpckhdq %xmm7, %xmm6 +punpckldq %xmm1, %xmm8 +punpckldq %xmm3, %xmm9 +punpckldq %xmm5, %xmm10 +punpckldq %xmm7, %xmm11 +movdqa %xmm0, %xmm1 +movdqa %xmm4, %xmm3 +movdqa %xmm8, %xmm5 +movdqa %xmm10, %xmm7 +punpckhqdq %xmm2, %xmm0 +punpckhqdq %xmm6, %xmm4 +punpckhqdq %xmm9, %xmm8 +punpckhqdq %xmm11, %xmm10 +punpcklqdq %xmm2, %xmm1 +punpcklqdq %xmm6, %xmm3 +punpcklqdq %xmm9, %xmm5 +punpcklqdq %xmm11, %xmm7 +andq %rsi, %rsi +jz chacha_blocks_sse2_noinput1 +movdqu 0(%rsi), %xmm2 +movdqu 16(%rsi), %xmm6 +movdqu 64(%rsi), %xmm9 +movdqu 80(%rsi), %xmm11 +movdqu 128(%rsi), %xmm12 +movdqu 144(%rsi), %xmm13 +movdqu 192(%rsi), %xmm14 +movdqu 208(%rsi), %xmm15 +pxor %xmm2, %xmm5 +pxor %xmm6, %xmm7 +pxor %xmm9, %xmm8 +pxor %xmm11, %xmm10 +pxor %xmm12, %xmm1 +pxor %xmm13, %xmm3 +pxor %xmm14, %xmm0 +pxor %xmm15, %xmm4 +movdqu %xmm5, 0(%rdx) +movdqu %xmm7, 16(%rdx) +movdqu %xmm8, 64(%rdx) +movdqu %xmm10, 80(%rdx) +movdqu %xmm1, 128(%rdx) +movdqu %xmm3, 144(%rdx) +movdqu %xmm0, 192(%rdx) +movdqu %xmm4, 208(%rdx) +movdqa 384(%rsp), %xmm0 +movdqa 400(%rsp), %xmm1 +movdqa 416(%rsp), %xmm2 +movdqa 432(%rsp), %xmm3 +movdqa 448(%rsp), %xmm4 +movdqa 464(%rsp), %xmm5 +movdqa 480(%rsp), %xmm6 +movdqa 496(%rsp), %xmm7 +movdqa %xmm0, %xmm8 +movdqa %xmm2, %xmm9 +movdqa %xmm4, %xmm10 +movdqa %xmm6, %xmm11 +punpckldq %xmm1, %xmm8 +punpckldq %xmm3, %xmm9 +punpckhdq %xmm1, %xmm0 +punpckhdq %xmm3, %xmm2 +punpckldq %xmm5, %xmm10 +punpckldq %xmm7, %xmm11 +punpckhdq %xmm5, %xmm4 +punpckhdq %xmm7, %xmm6 +movdqa %xmm8, %xmm1 +movdqa %xmm0, %xmm3 +movdqa %xmm10, %xmm5 +movdqa %xmm4, %xmm7 +punpcklqdq %xmm9, %xmm1 +punpcklqdq %xmm11, %xmm5 +punpckhqdq %xmm9, %xmm8 +punpckhqdq %xmm11, %xmm10 +punpcklqdq %xmm2, %xmm3 +punpcklqdq %xmm6, %xmm7 +punpckhqdq %xmm2, %xmm0 +punpckhqdq %xmm6, %xmm4 +movdqu 32(%rsi), %xmm2 +movdqu 48(%rsi), %xmm6 +movdqu 96(%rsi), %xmm9 +movdqu 112(%rsi), %xmm11 +movdqu 160(%rsi), %xmm12 +movdqu 176(%rsi), %xmm13 +movdqu 224(%rsi), %xmm14 +movdqu 240(%rsi), %xmm15 +pxor %xmm2, %xmm1 +pxor %xmm6, %xmm5 +pxor %xmm9, %xmm8 +pxor %xmm11, %xmm10 +pxor %xmm12, %xmm3 +pxor %xmm13, %xmm7 +pxor %xmm14, %xmm0 +pxor %xmm15, %xmm4 +movdqu %xmm1, 32(%rdx) +movdqu %xmm5, 48(%rdx) +movdqu %xmm8, 96(%rdx) +movdqu %xmm10, 112(%rdx) +movdqu %xmm3, 160(%rdx) +movdqu %xmm7, 176(%rdx) +movdqu %xmm0, 224(%rdx) +movdqu %xmm4, 240(%rdx) +addq $256, %rsi +jmp chacha_blocks_sse2_mainloop_cont +chacha_blocks_sse2_noinput1: +movdqu %xmm5, 0(%rdx) +movdqu %xmm7, 16(%rdx) +movdqu %xmm8, 64(%rdx) +movdqu %xmm10, 80(%rdx) +movdqu %xmm1, 128(%rdx) +movdqu %xmm3, 144(%rdx) +movdqu %xmm0, 192(%rdx) +movdqu %xmm4, 208(%rdx) +movdqa 384(%rsp), %xmm0 +movdqa 400(%rsp), %xmm1 +movdqa 416(%rsp), %xmm2 +movdqa 432(%rsp), %xmm3 +movdqa 448(%rsp), %xmm4 +movdqa 464(%rsp), %xmm5 +movdqa 480(%rsp), %xmm6 +movdqa 496(%rsp), %xmm7 +movdqa %xmm0, %xmm8 +movdqa %xmm2, %xmm9 +movdqa %xmm4, %xmm10 +movdqa %xmm6, %xmm11 +punpckldq %xmm1, %xmm8 +punpckldq %xmm3, %xmm9 +punpckhdq %xmm1, %xmm0 +punpckhdq %xmm3, %xmm2 +punpckldq %xmm5, %xmm10 +punpckldq %xmm7, %xmm11 +punpckhdq %xmm5, %xmm4 +punpckhdq %xmm7, %xmm6 +movdqa %xmm8, %xmm1 +movdqa %xmm0, %xmm3 +movdqa %xmm10, %xmm5 +movdqa %xmm4, %xmm7 +punpcklqdq %xmm9, %xmm1 +punpcklqdq %xmm11, %xmm5 +punpckhqdq %xmm9, %xmm8 +punpckhqdq %xmm11, %xmm10 +punpcklqdq %xmm2, %xmm3 +punpcklqdq %xmm6, %xmm7 +punpckhqdq %xmm2, %xmm0 +punpckhqdq %xmm6, %xmm4 +movdqu %xmm1, 32(%rdx) +movdqu %xmm5, 48(%rdx) +movdqu %xmm8, 96(%rdx) +movdqu %xmm10, 112(%rdx) +movdqu %xmm3, 160(%rdx) +movdqu %xmm7, 176(%rdx) +movdqu %xmm0, 224(%rdx) +movdqu %xmm4, 240(%rdx) +chacha_blocks_sse2_mainloop_cont: +addq $256, %rdx +subq $256, %rcx +cmp $256, %rcx +jae chacha_blocks_sse2_atleast256 +movdqa 0(%rsp), %xmm8 +movdqa 16(%rsp), %xmm9 +movdqa 32(%rsp), %xmm10 +movdqa 48(%rsp), %xmm11 +movq $1, %r9 +chacha_blocks_sse2_below256: +movq %r9, %xmm5 +andq %rcx, %rcx +jz chacha_blocks_sse2_done +cmpq $64, %rcx +jae chacha_blocks_sse2_above63 +movq %rdx, %r9 +andq %rsi, %rsi +jz chacha_blocks_sse2_noinput2 +movq %rcx, %r10 +movq %rsp, %rdx +addq %r10, %rsi +addq %r10, %rdx +negq %r10 +chacha_blocks_sse2_copyinput: +movb (%rsi, %r10), %al +movb %al, (%rdx, %r10) +incq %r10 +jnz chacha_blocks_sse2_copyinput +movq %rsp, %rsi +chacha_blocks_sse2_noinput2: +movq %rsp, %rdx +chacha_blocks_sse2_above63: +movdqa %xmm8, %xmm0 +movdqa %xmm9, %xmm1 +movdqa %xmm10, %xmm2 +movdqa %xmm11, %xmm3 +movq 64(%rsp), %rax +chacha_blocks_sse2_mainloop2: +paddd %xmm1, %xmm0 +pxor %xmm0, %xmm3 +pshuflw $0xb1,%xmm3,%xmm3 +pshufhw $0xb1,%xmm3,%xmm3 +paddd %xmm3, %xmm2 +pxor %xmm2, %xmm1 +movdqa %xmm1,%xmm4 +pslld $12, %xmm1 +psrld $20, %xmm4 +pxor %xmm4, %xmm1 +paddd %xmm1, %xmm0 +pxor %xmm0, %xmm3 +movdqa %xmm3,%xmm4 +pslld $8, %xmm3 +psrld $24, %xmm4 +pshufd $0x93,%xmm0,%xmm0 +pxor %xmm4, %xmm3 +paddd %xmm3, %xmm2 +pshufd $0x4e,%xmm3,%xmm3 +pxor %xmm2, %xmm1 +pshufd $0x39,%xmm2,%xmm2 +movdqa %xmm1,%xmm4 +pslld $7, %xmm1 +psrld $25, %xmm4 +pxor %xmm4, %xmm1 +subq $2, %rax +paddd %xmm1, %xmm0 +pxor %xmm0, %xmm3 +pshuflw $0xb1,%xmm3,%xmm3 +pshufhw $0xb1,%xmm3,%xmm3 +paddd %xmm3, %xmm2 +pxor %xmm2, %xmm1 +movdqa %xmm1,%xmm4 +pslld $12, %xmm1 +psrld $20, %xmm4 +pxor %xmm4, %xmm1 +paddd %xmm1, %xmm0 +pxor %xmm0, %xmm3 +movdqa %xmm3,%xmm4 +pslld $8, %xmm3 +psrld $24, %xmm4 +pshufd $0x39,%xmm0,%xmm0 +pxor %xmm4, %xmm3 +paddd %xmm3, %xmm2 +pshufd $0x4e,%xmm3,%xmm3 +pxor %xmm2, %xmm1 +pshufd $0x93,%xmm2,%xmm2 +movdqa %xmm1,%xmm4 +pslld $7, %xmm1 +psrld $25, %xmm4 +pxor %xmm4, %xmm1 +jnz chacha_blocks_sse2_mainloop2 +paddd %xmm8, %xmm0 +paddd %xmm9, %xmm1 +paddd %xmm10, %xmm2 +paddd %xmm11, %xmm3 +andq %rsi, %rsi +jz chacha_blocks_sse2_noinput3 +movdqu 0(%rsi), %xmm12 +movdqu 16(%rsi), %xmm13 +movdqu 32(%rsi), %xmm14 +movdqu 48(%rsi), %xmm15 +pxor %xmm12, %xmm0 +pxor %xmm13, %xmm1 +pxor %xmm14, %xmm2 +pxor %xmm15, %xmm3 +addq $64, %rsi +chacha_blocks_sse2_noinput3: +movdqu %xmm0, 0(%rdx) +movdqu %xmm1, 16(%rdx) +movdqu %xmm2, 32(%rdx) +movdqu %xmm3, 48(%rdx) +paddq %xmm5, %xmm11 +cmpq $64, %rcx +jbe chacha_blocks_sse2_mainloop2_finishup +addq $64, %rdx +subq $64, %rcx +jmp chacha_blocks_sse2_below256 +chacha_blocks_sse2_mainloop2_finishup: +cmpq $64, %rcx +je chacha_blocks_sse2_done +addq %rcx, %r9 +addq %rcx, %rdx +negq %rcx +chacha_blocks_sse2_copyoutput: +movb (%rdx, %rcx), %al +movb %al, (%r9, %rcx) +incq %rcx +jnz chacha_blocks_sse2_copyoutput +chacha_blocks_sse2_done: +movdqu %xmm11, 32(%rdi) +movq %rbp, %rsp +popq %rbp +popq %rbx +ret +FN_END chacha_blocks_sse2 + +GLOBAL_HIDDEN_FN hchacha_sse2 +hchacha_sse2_local: +movq $0x3320646e61707865, %rax +movq $0x6b20657479622d32, %r8 +movd %rax, %xmm0 +movd %r8, %xmm4 +punpcklqdq %xmm4, %xmm0 +movdqu 0(%rdi), %xmm1 +movdqu 16(%rdi), %xmm2 +movdqu 0(%rsi), %xmm3 +hchacha_sse2_mainloop: +paddd %xmm1, %xmm0 +pxor %xmm0, %xmm3 +pshuflw $0xb1,%xmm3,%xmm3 +pshufhw $0xb1,%xmm3,%xmm3 +paddd %xmm3, %xmm2 +pxor %xmm2, %xmm1 +movdqa %xmm1,%xmm4 +pslld $12, %xmm1 +psrld $20, %xmm4 +pxor %xmm4, %xmm1 +paddd %xmm1, %xmm0 +pxor %xmm0, %xmm3 +movdqa %xmm3,%xmm4 +pslld $8, %xmm3 +psrld $24, %xmm4 +pshufd $0x93,%xmm0,%xmm0 +pxor %xmm4, %xmm3 +paddd %xmm3, %xmm2 +pshufd $0x4e,%xmm3,%xmm3 +pxor %xmm2, %xmm1 +pshufd $0x39,%xmm2,%xmm2 +movdqa %xmm1,%xmm4 +pslld $7, %xmm1 +psrld $25, %xmm4 +pxor %xmm4, %xmm1 +subq $2, %rcx +paddd %xmm1, %xmm0 +pxor %xmm0, %xmm3 +pshuflw $0xb1,%xmm3,%xmm3 +pshufhw $0xb1,%xmm3,%xmm3 +paddd %xmm3, %xmm2 +pxor %xmm2, %xmm1 +movdqa %xmm1,%xmm4 +pslld $12, %xmm1 +psrld $20, %xmm4 +pxor %xmm4, %xmm1 +paddd %xmm1, %xmm0 +pxor %xmm0, %xmm3 +movdqa %xmm3,%xmm4 +pslld $8, %xmm3 +psrld $24, %xmm4 +pshufd $0x39,%xmm0,%xmm0 +pxor %xmm4, %xmm3 +paddd %xmm3, %xmm2 +pshufd $0x4e,%xmm3,%xmm3 +pxor %xmm2, %xmm1 +pshufd $0x93,%xmm2,%xmm2 +movdqa %xmm1,%xmm4 +pslld $7, %xmm1 +psrld $25, %xmm4 +pxor %xmm4, %xmm1 +ja hchacha_sse2_mainloop +movdqu %xmm0, 0(%rdx) +movdqu %xmm3, 16(%rdx) +ret +FN_END hchacha_sse2 + +GLOBAL_HIDDEN_FN_EXT chacha_sse2, 6, 16 +pushq %rbp +movq %rsp, %rbp +subq $64, %rsp +andq $~63, %rsp +movdqu 0(%rdi), %xmm0 +movdqu 16(%rdi), %xmm1 +movdqa %xmm0, 0(%rsp) +movdqa %xmm1, 16(%rsp) +xorq %rdi, %rdi +movq %rdi, 32(%rsp) +movq 0(%rsi), %rsi +movq %rsi, 40(%rsp) +movq %r9, 48(%rsp) +movq %rsp, %rdi +movq %rdx, %rsi +movq %rcx, %rdx +movq %r8, %rcx +call chacha_blocks_sse2_local +pxor %xmm0, %xmm0 +movdqa %xmm0, 0(%rsp) +movdqa %xmm0, 16(%rsp) +movdqa %xmm0, 32(%rsp) +movq %rbp, %rsp +popq %rbp +ret +FN_END chacha_sse2 + +GLOBAL_HIDDEN_FN_EXT xchacha_sse2, 6, 16 +pushq %rbp +pushq %rbx +movq %rsp, %rbp +subq $64, %rsp +andq $~63, %rsp +movq %rsp, %rbx +xorq %rax, %rax +movq %rax, 32(%rbx) +movq 16(%rsi), %rax +movq %rax, 40(%rbx) +movq %r9, 48(%rbx) +pushq %rdx +pushq %rcx +pushq %r8 +movq %rbx, %rdx +movq %r9, %rcx +call hchacha_sse2_local +movq %rbx, %rdi +popq %rcx +popq %rdx +popq %rsi +call chacha_blocks_sse2_local +pxor %xmm0, %xmm0 +movdqa %xmm0, 0(%rbx) +movdqa %xmm0, 16(%rbx) +movdqa %xmm0, 32(%rbx) +movq %rbp, %rsp +popq %rbx +popq %rbp +ret +FN_END xchacha_sse2 diff --git a/src/libcryptobox/cryptobox.c b/src/libcryptobox/cryptobox.c new file mode 100644 index 0000000..e118c4a --- /dev/null +++ b/src/libcryptobox/cryptobox.c @@ -0,0 +1,1778 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Workaround for memset_s */ +#ifdef __APPLE__ +#define __STDC_WANT_LIB_EXT1__ 1 +#include <string.h> +#endif + +#include "config.h" +#include "cryptobox.h" +#include "platform_config.h" +#include "chacha20/chacha.h" +#include "catena/catena.h" +#include "base64/base64.h" +#include "ottery.h" +#include "printf.h" +#define XXH_INLINE_ALL +#define XXH_PRIVATE_API +#include "xxhash.h" +#define MUM_TARGET_INDEPENDENT_HASH 1 /* For 32/64 bit equal hashes */ +#include "../../contrib/mumhash/mum.h" +#include "../../contrib/t1ha/t1ha.h" +#ifdef HAVE_CPUID_H +#include <cpuid.h> +#endif +#ifdef HAVE_OPENSSL +#include <openssl/opensslv.h> +/* Openssl >= 1.0.1d is required for GCM verification */ +#if OPENSSL_VERSION_NUMBER >= 0x1000104fL +#define HAVE_USABLE_OPENSSL 1 +#endif +#endif + +#ifdef HAVE_USABLE_OPENSSL +#include <openssl/evp.h> +#include <openssl/ec.h> +#include <openssl/ecdh.h> +#include <openssl/ecdsa.h> +#include <openssl/rand.h> +#define CRYPTOBOX_CURVE_NID NID_X9_62_prime256v1 +#endif + +#include <signal.h> +#include <setjmp.h> +#include <stdalign.h> + +#include <sodium.h> + +unsigned cpu_config = 0; + +static gboolean cryptobox_loaded = FALSE; + +static const guchar n0[16] = {0}; + +#define CRYPTOBOX_ALIGNMENT 16 +#define cryptobox_align_ptr(p, a) \ + (void *) (((uintptr_t) (p) + ((uintptr_t) a - 1)) & ~((uintptr_t) a - 1)) + +static void +rspamd_cryptobox_cpuid(gint cpu[4], gint info) +{ + guint32 __attribute__((unused)) eax, __attribute__((unused)) ecx = 0, __attribute__((unused)) ebx = 0, __attribute__((unused)) edx = 0; + + eax = info; +#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) +#if defined(__i386__) && defined(__PIC__) + + /* in case of PIC under 32-bit EBX cannot be clobbered */ + + __asm__ volatile("movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi" + : "=D"(ebx), + "+a"(eax), "+c"(ecx), "=d"(edx)); +#else + __asm__ volatile("cpuid" + : "+b"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx)); +#endif + + cpu[0] = eax; + cpu[1] = ebx; + cpu[2] = ecx; + cpu[3] = edx; +#else + memset(cpu, 0, sizeof(gint) * 4); +#endif +} + +static sig_atomic_t ok = 0; +static jmp_buf j; + +__attribute__((noreturn)) static void +rspamd_cryptobox_ill_handler(int signo) +{ + ok = 0; + longjmp(j, -1); +} + +static gboolean +rspamd_cryptobox_test_instr(gint instr) +{ + void (*old_handler)(int); + guint32 rd; + +#if defined(__GNUC__) + ok = 1; + old_handler = signal(SIGILL, rspamd_cryptobox_ill_handler); + + if (setjmp(j) != 0) { + signal(SIGILL, old_handler); + + return FALSE; + } + + switch (instr) { +#if defined HAVE_SSE2 && defined(__x86_64__) + case CPUID_SSE2: + __asm__ volatile("psubb %xmm0, %xmm0"); + break; + case CPUID_RDRAND: + /* Use byte code here for compatibility */ + __asm__ volatile(".byte 0x0f,0xc7,0xf0; setc %1" + : "=a"(rd), "=qm"(ok) + : + : "edx"); + break; +#endif +#ifdef HAVE_SSE3 + case CPUID_SSE3: + __asm__ volatile("movshdup %xmm0, %xmm0"); + break; +#endif +#ifdef HAVE_SSSE3 + case CPUID_SSSE3: + __asm__ volatile("pshufb %xmm0, %xmm0"); + break; +#endif +#ifdef HAVE_SSE41 + case CPUID_SSE41: + __asm__ volatile("pcmpeqq %xmm0, %xmm0"); + break; +#endif +#if defined HAVE_SSE42 && defined(__x86_64__) + case CPUID_SSE42: + __asm__ volatile("pushq %rax\n" + "xorq %rax, %rax\n" + "crc32 %rax, %rax\n" + "popq %rax"); + break; +#endif +#ifdef HAVE_AVX + case CPUID_AVX: + __asm__ volatile("vpaddq %xmm0, %xmm0, %xmm0"); + break; +#endif +#ifdef HAVE_AVX2 + case CPUID_AVX2: + __asm__ volatile("vpaddq %ymm0, %ymm0, %ymm0"); + break; +#endif + default: + return FALSE; + break; + } + + signal(SIGILL, old_handler); +#endif + + (void) rd; /* Silence warning */ + + /* We actually never return here if SIGILL has been caught */ + return ok == 1; +} + +struct rspamd_cryptobox_library_ctx * +rspamd_cryptobox_init(void) +{ + gint cpu[4], nid; + const guint32 osxsave_mask = (1 << 27); + const guint32 fma_movbe_osxsave_mask = ((1 << 12) | (1 << 22) | (1 << 27)); + const guint32 avx2_bmi12_mask = (1 << 5) | (1 << 3) | (1 << 8); + gulong bit; + static struct rspamd_cryptobox_library_ctx *ctx; + GString *buf; + + if (cryptobox_loaded) { + /* Ignore reload attempts */ + return ctx; + } + + cryptobox_loaded = TRUE; + ctx = g_malloc0(sizeof(*ctx)); + + rspamd_cryptobox_cpuid(cpu, 0); + nid = cpu[0]; + rspamd_cryptobox_cpuid(cpu, 1); + + if (nid > 1) { + if ((cpu[3] & ((guint32) 1 << 26))) { + if (rspamd_cryptobox_test_instr(CPUID_SSE2)) { + cpu_config |= CPUID_SSE2; + } + } + if ((cpu[2] & ((guint32) 1 << 0))) { + if (rspamd_cryptobox_test_instr(CPUID_SSE3)) { + cpu_config |= CPUID_SSE3; + } + } + if ((cpu[2] & ((guint32) 1 << 9))) { + if (rspamd_cryptobox_test_instr(CPUID_SSSE3)) { + cpu_config |= CPUID_SSSE3; + } + } + if ((cpu[2] & ((guint32) 1 << 19))) { + if (rspamd_cryptobox_test_instr(CPUID_SSE41)) { + cpu_config |= CPUID_SSE41; + } + } + if ((cpu[2] & ((guint32) 1 << 20))) { + if (rspamd_cryptobox_test_instr(CPUID_SSE42)) { + cpu_config |= CPUID_SSE42; + } + } + if ((cpu[2] & ((guint32) 1 << 30))) { + if (rspamd_cryptobox_test_instr(CPUID_RDRAND)) { + cpu_config |= CPUID_RDRAND; + } + } + + /* OSXSAVE */ + if ((cpu[2] & osxsave_mask) == osxsave_mask) { + if ((cpu[2] & ((guint32) 1 << 28))) { + if (rspamd_cryptobox_test_instr(CPUID_AVX)) { + cpu_config |= CPUID_AVX; + } + } + + if (nid >= 7 && + (cpu[2] & fma_movbe_osxsave_mask) == fma_movbe_osxsave_mask) { + rspamd_cryptobox_cpuid(cpu, 7); + + if ((cpu[1] & avx2_bmi12_mask) == avx2_bmi12_mask) { + if (rspamd_cryptobox_test_instr(CPUID_AVX2)) { + cpu_config |= CPUID_AVX2; + } + } + } + } + } + + buf = g_string_new(""); + + for (bit = 0x1; bit != 0; bit <<= 1) { + if (cpu_config & bit) { + switch (bit) { + case CPUID_SSE2: + rspamd_printf_gstring(buf, "sse2, "); + break; + case CPUID_SSE3: + rspamd_printf_gstring(buf, "sse3, "); + break; + case CPUID_SSSE3: + rspamd_printf_gstring(buf, "ssse3, "); + break; + case CPUID_SSE41: + rspamd_printf_gstring(buf, "sse4.1, "); + break; + case CPUID_SSE42: + rspamd_printf_gstring(buf, "sse4.2, "); + break; + case CPUID_AVX: + rspamd_printf_gstring(buf, "avx, "); + break; + case CPUID_AVX2: + rspamd_printf_gstring(buf, "avx2, "); + break; + case CPUID_RDRAND: + rspamd_printf_gstring(buf, "rdrand, "); + break; + default: + break; /* Silence warning */ + } + } + } + + if (buf->len > 2) { + /* Trim last chars */ + g_string_erase(buf, buf->len - 2, 2); + } + + ctx->cpu_extensions = buf->str; + g_string_free(buf, FALSE); + ctx->cpu_config = cpu_config; + g_assert(sodium_init() != -1); + + ctx->chacha20_impl = chacha_load(); + ctx->base64_impl = base64_load(); +#if defined(HAVE_USABLE_OPENSSL) && (OPENSSL_VERSION_NUMBER < 0x10100000L || defined(LIBRESSL_VERSION_NUMBER)) + /* Needed for old openssl api, not sure about LibreSSL */ + ERR_load_EC_strings(); + ERR_load_RAND_strings(); + ERR_load_EVP_strings(); +#endif + + return ctx; +} + +void rspamd_cryptobox_deinit(struct rspamd_cryptobox_library_ctx *ctx) +{ + if (ctx) { + g_free(ctx->cpu_extensions); + g_free(ctx); + } +} + +void rspamd_cryptobox_keypair(rspamd_pk_t pk, rspamd_sk_t sk, + enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + ottery_rand_bytes(sk, rspamd_cryptobox_MAX_SKBYTES); + sk[0] &= 248; + sk[31] &= 127; + sk[31] |= 64; + + crypto_scalarmult_base(pk, sk); + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + EC_KEY *ec_sec; + const BIGNUM *bn_sec; + + const EC_POINT *ec_pub; + gsize len; + + ec_sec = EC_KEY_new_by_curve_name(CRYPTOBOX_CURVE_NID); + g_assert(ec_sec != NULL); + g_assert(EC_KEY_generate_key(ec_sec) != 0); + + bn_sec = EC_KEY_get0_private_key(ec_sec); + g_assert(bn_sec != NULL); + ec_pub = EC_KEY_get0_public_key(ec_sec); + g_assert(ec_pub != NULL); +#if OPENSSL_VERSION_MAJOR >= 3 + unsigned char *buf = NULL; /* Thanks openssl for this API (no) */ + len = EC_POINT_point2buf(EC_KEY_get0_group(ec_sec), ec_pub, + POINT_CONVERSION_UNCOMPRESSED, &buf, NULL); + g_assert(len <= (gint) rspamd_cryptobox_pk_bytes(mode)); + memcpy(pk, buf, len); + OPENSSL_free(buf); +#else + BIGNUM *bn_pub; + bn_pub = EC_POINT_point2bn(EC_KEY_get0_group(ec_sec), + ec_pub, POINT_CONVERSION_UNCOMPRESSED, NULL, NULL); + len = BN_num_bytes(bn_pub); + g_assert(len <= (gint) rspamd_cryptobox_pk_bytes(mode)); + BN_bn2bin(bn_pub, pk); + BN_free(bn_pub); +#endif + + len = BN_num_bytes(bn_sec); + g_assert(len <= (gint) sizeof(rspamd_sk_t)); + BN_bn2bin(bn_sec, sk); + + EC_KEY_free(ec_sec); +#endif + } +} + +void rspamd_cryptobox_keypair_sig(rspamd_sig_pk_t pk, rspamd_sig_sk_t sk, + enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + crypto_sign_keypair(pk, sk); + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + EC_KEY *ec_sec; + const BIGNUM *bn_sec; + const EC_POINT *ec_pub; + gsize len; + + ec_sec = EC_KEY_new_by_curve_name(CRYPTOBOX_CURVE_NID); + g_assert(ec_sec != NULL); + g_assert(EC_KEY_generate_key(ec_sec) != 0); + + bn_sec = EC_KEY_get0_private_key(ec_sec); + g_assert(bn_sec != NULL); + ec_pub = EC_KEY_get0_public_key(ec_sec); + g_assert(ec_pub != NULL); + +#if OPENSSL_VERSION_MAJOR >= 3 + unsigned char *buf = NULL; /* Thanks openssl for this API (no) */ + len = EC_POINT_point2buf(EC_KEY_get0_group(ec_sec), ec_pub, + POINT_CONVERSION_UNCOMPRESSED, &buf, NULL); + g_assert(len <= (gint) rspamd_cryptobox_pk_bytes(mode)); + memcpy(pk, buf, len); + OPENSSL_free(buf); +#else + BIGNUM *bn_pub; + bn_pub = EC_POINT_point2bn(EC_KEY_get0_group(ec_sec), + ec_pub, POINT_CONVERSION_UNCOMPRESSED, NULL, NULL); + len = BN_num_bytes(bn_pub); + g_assert(len <= (gint) rspamd_cryptobox_pk_bytes(mode)); + BN_bn2bin(bn_pub, pk); + BN_free(bn_pub); +#endif + + len = BN_num_bytes(bn_sec); + g_assert(len <= (gint) sizeof(rspamd_sk_t)); + BN_bn2bin(bn_sec, sk); + EC_KEY_free(ec_sec); +#endif + } +} + +#if OPENSSL_VERSION_MAJOR >= 3 +/* Compatibility function for OpenSSL 3.0 - thanks for breaking all API one more time */ +EC_POINT *ec_point_bn2point_compat(const EC_GROUP *group, + const BIGNUM *bn, EC_POINT *point, BN_CTX *ctx) +{ + size_t buf_len = 0; + unsigned char *buf; + EC_POINT *ret; + + if ((buf_len = BN_num_bytes(bn)) == 0) + buf_len = 1; + if ((buf = OPENSSL_malloc(buf_len)) == NULL) { + return NULL; + } + + if (!BN_bn2binpad(bn, buf, buf_len)) { + OPENSSL_free(buf); + return NULL; + } + + if (point == NULL) { + if ((ret = EC_POINT_new(group)) == NULL) { + OPENSSL_free(buf); + return NULL; + } + } + else + ret = point; + + if (!EC_POINT_oct2point(group, ret, buf, buf_len, ctx)) { + if (ret != point) + EC_POINT_clear_free(ret); + OPENSSL_free(buf); + return NULL; + } + + OPENSSL_free(buf); + return ret; +} +#else +#define ec_point_bn2point_compat EC_POINT_bn2point +#endif + +void rspamd_cryptobox_nm(rspamd_nm_t nm, + const rspamd_pk_t pk, const rspamd_sk_t sk, + enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + guchar s[32]; + guchar e[32]; + + memcpy(e, sk, 32); + e[0] &= 248; + e[31] &= 127; + e[31] |= 64; + + if (crypto_scalarmult(s, e, pk) != -1) { + hchacha(s, n0, nm, 20); + } + + rspamd_explicit_memzero(e, 32); + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + EC_KEY *lk; + EC_POINT *ec_pub; + BIGNUM *bn_pub, *bn_sec; + gint len; + guchar s[32]; + + lk = EC_KEY_new_by_curve_name(CRYPTOBOX_CURVE_NID); + g_assert(lk != NULL); + + bn_pub = BN_bin2bn(pk, rspamd_cryptobox_pk_bytes(mode), NULL); + g_assert(bn_pub != NULL); + bn_sec = BN_bin2bn(sk, sizeof(rspamd_sk_t), NULL); + g_assert(bn_sec != NULL); + + g_assert(EC_KEY_set_private_key(lk, bn_sec) == 1); + ec_pub = ec_point_bn2point_compat(EC_KEY_get0_group(lk), bn_pub, NULL, NULL); + g_assert(ec_pub != NULL); + len = ECDH_compute_key(s, sizeof(s), ec_pub, lk, NULL); + g_assert(len == sizeof(s)); + + /* Still do hchacha iteration since we are not using SHA1 KDF */ + hchacha(s, n0, nm, 20); + + EC_KEY_free(lk); + EC_POINT_free(ec_pub); + BN_free(bn_sec); + BN_free(bn_pub); +#endif + } +} + +void rspamd_cryptobox_sign(guchar *sig, unsigned long long *siglen_p, + const guchar *m, gsize mlen, + const rspamd_sk_t sk, + enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + crypto_sign_detached(sig, siglen_p, m, mlen, sk); + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + EC_KEY *lk; + BIGNUM *bn_sec; + EVP_MD_CTX *sha_ctx; + unsigned char h[64]; + guint diglen = rspamd_cryptobox_signature_bytes(mode); + + /* Prehash */ + sha_ctx = EVP_MD_CTX_create(); + g_assert(EVP_DigestInit(sha_ctx, EVP_sha512()) == 1); + EVP_DigestUpdate(sha_ctx, m, mlen); + EVP_DigestFinal(sha_ctx, h, NULL); + + /* Key setup */ + lk = EC_KEY_new_by_curve_name(CRYPTOBOX_CURVE_NID); + g_assert(lk != NULL); + bn_sec = BN_bin2bn(sk, sizeof(rspamd_sk_t), NULL); + g_assert(bn_sec != NULL); + g_assert(EC_KEY_set_private_key(lk, bn_sec) == 1); + + /* ECDSA */ + g_assert(ECDSA_sign(0, h, sizeof(h), sig, &diglen, lk) == 1); + g_assert(diglen <= sizeof(rspamd_signature_t)); + + if (siglen_p) { + *siglen_p = diglen; + } + + EC_KEY_free(lk); + EVP_MD_CTX_destroy(sha_ctx); + BN_free(bn_sec); +#endif + } +} + +bool rspamd_cryptobox_verify(const guchar *sig, + gsize siglen, + const guchar *m, + gsize mlen, + const rspamd_pk_t pk, + enum rspamd_cryptobox_mode mode) +{ + bool ret = false; + + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + if (siglen == rspamd_cryptobox_signature_bytes(RSPAMD_CRYPTOBOX_MODE_25519)) { + ret = (crypto_sign_verify_detached(sig, m, mlen, pk) == 0); + } + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + EC_KEY *lk; + EC_POINT *ec_pub; + BIGNUM *bn_pub; + EVP_MD_CTX *sha_ctx; + unsigned char h[64]; + + /* Prehash */ + sha_ctx = EVP_MD_CTX_create(); + g_assert(EVP_DigestInit(sha_ctx, EVP_sha512()) == 1); + EVP_DigestUpdate(sha_ctx, m, mlen); + EVP_DigestFinal(sha_ctx, h, NULL); + + /* Key setup */ + lk = EC_KEY_new_by_curve_name(CRYPTOBOX_CURVE_NID); + g_assert(lk != NULL); + bn_pub = BN_bin2bn(pk, rspamd_cryptobox_pk_bytes(mode), NULL); + g_assert(bn_pub != NULL); + ec_pub = ec_point_bn2point_compat(EC_KEY_get0_group(lk), bn_pub, NULL, NULL); + g_assert(ec_pub != NULL); + g_assert(EC_KEY_set_public_key(lk, ec_pub) == 1); + + /* ECDSA */ + ret = ECDSA_verify(0, h, sizeof(h), sig, siglen, lk) == 1; + + EC_KEY_free(lk); + EVP_MD_CTX_destroy(sha_ctx); + BN_free(bn_pub); + EC_POINT_free(ec_pub); +#endif + } + + return ret; +} + +static gsize +rspamd_cryptobox_encrypt_ctx_len(enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + return sizeof(chacha_state) + CRYPTOBOX_ALIGNMENT; + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + return sizeof(EVP_CIPHER_CTX *) + CRYPTOBOX_ALIGNMENT; +#endif + } + + return 0; +} + +static gsize +rspamd_cryptobox_auth_ctx_len(enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + return sizeof(crypto_onetimeauth_state) + RSPAMD_ALIGNOF(crypto_onetimeauth_state); + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + return sizeof(void *); +#endif + } + + return 0; +} + +static void * +rspamd_cryptobox_encrypt_init(void *enc_ctx, const rspamd_nonce_t nonce, + const rspamd_nm_t nm, + enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + chacha_state *s; + + s = cryptobox_align_ptr(enc_ctx, CRYPTOBOX_ALIGNMENT); + xchacha_init(s, + (const chacha_key *) nm, + (const chacha_iv24 *) nonce, + 20); + + return s; + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + EVP_CIPHER_CTX **s; + + s = cryptobox_align_ptr(enc_ctx, CRYPTOBOX_ALIGNMENT); + memset(s, 0, sizeof(*s)); + *s = EVP_CIPHER_CTX_new(); + g_assert(EVP_EncryptInit_ex(*s, EVP_aes_256_gcm(), NULL, NULL, NULL) == 1); + g_assert(EVP_CIPHER_CTX_ctrl(*s, EVP_CTRL_GCM_SET_IVLEN, + rspamd_cryptobox_nonce_bytes(mode), NULL) == 1); + g_assert(EVP_EncryptInit_ex(*s, NULL, NULL, nm, nonce) == 1); + + return s; +#endif + } + + return NULL; +} + +static void * +rspamd_cryptobox_auth_init(void *auth_ctx, void *enc_ctx, + enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + crypto_onetimeauth_state *mac_ctx; + guchar RSPAMD_ALIGNED(32) subkey[CHACHA_BLOCKBYTES]; + + mac_ctx = cryptobox_align_ptr(auth_ctx, CRYPTOBOX_ALIGNMENT); + memset(subkey, 0, sizeof(subkey)); + chacha_update(enc_ctx, subkey, subkey, sizeof(subkey)); + crypto_onetimeauth_init(mac_ctx, subkey); + rspamd_explicit_memzero(subkey, sizeof(subkey)); + + return mac_ctx; + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + auth_ctx = enc_ctx; + + return auth_ctx; +#endif + } + + return NULL; +} + +static gboolean +rspamd_cryptobox_encrypt_update(void *enc_ctx, const guchar *in, gsize inlen, + guchar *out, gsize *outlen, + enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + gsize r; + chacha_state *s; + + s = cryptobox_align_ptr(enc_ctx, CRYPTOBOX_ALIGNMENT); + + r = chacha_update(s, in, out, inlen); + + if (outlen != NULL) { + *outlen = r; + } + + return TRUE; + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + EVP_CIPHER_CTX **s = enc_ctx; + gint r; + + r = inlen; + g_assert(EVP_EncryptUpdate(*s, out, &r, in, inlen) == 1); + + if (outlen) { + *outlen = r; + } + + return TRUE; +#endif + } + + return FALSE; +} + +static gboolean +rspamd_cryptobox_auth_update(void *auth_ctx, const guchar *in, gsize inlen, + enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + crypto_onetimeauth_state *mac_ctx; + + mac_ctx = cryptobox_align_ptr(auth_ctx, CRYPTOBOX_ALIGNMENT); + crypto_onetimeauth_update(mac_ctx, in, inlen); + + return TRUE; + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + return TRUE; +#endif + } + + return FALSE; +} + +static gsize +rspamd_cryptobox_encrypt_final(void *enc_ctx, guchar *out, gsize remain, + enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + chacha_state *s; + + s = cryptobox_align_ptr(enc_ctx, CRYPTOBOX_ALIGNMENT); + return chacha_final(s, out); + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + EVP_CIPHER_CTX **s = enc_ctx; + gint r = remain; + + g_assert(EVP_EncryptFinal_ex(*s, out, &r) == 1); + + return r; +#endif + } + + return 0; +} + +static gboolean +rspamd_cryptobox_auth_final(void *auth_ctx, rspamd_mac_t sig, + enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + crypto_onetimeauth_state *mac_ctx; + + mac_ctx = cryptobox_align_ptr(auth_ctx, CRYPTOBOX_ALIGNMENT); + crypto_onetimeauth_final(mac_ctx, sig); + + return TRUE; + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + EVP_CIPHER_CTX **s = auth_ctx; + + g_assert(EVP_CIPHER_CTX_ctrl(*s, EVP_CTRL_GCM_GET_TAG, + sizeof(rspamd_mac_t), sig) == 1); + + return TRUE; +#endif + } + + return FALSE; +} + +static void * +rspamd_cryptobox_decrypt_init(void *enc_ctx, const rspamd_nonce_t nonce, + const rspamd_nm_t nm, + enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + + chacha_state *s; + + s = cryptobox_align_ptr(enc_ctx, CRYPTOBOX_ALIGNMENT); + xchacha_init(s, + (const chacha_key *) nm, + (const chacha_iv24 *) nonce, + 20); + + return s; + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + EVP_CIPHER_CTX **s; + + s = cryptobox_align_ptr(enc_ctx, CRYPTOBOX_ALIGNMENT); + memset(s, 0, sizeof(*s)); + *s = EVP_CIPHER_CTX_new(); + g_assert(EVP_DecryptInit_ex(*s, EVP_aes_256_gcm(), NULL, NULL, NULL) == 1); + g_assert(EVP_CIPHER_CTX_ctrl(*s, EVP_CTRL_GCM_SET_IVLEN, + rspamd_cryptobox_nonce_bytes(mode), NULL) == 1); + g_assert(EVP_DecryptInit_ex(*s, NULL, NULL, nm, nonce) == 1); + + return s; +#endif + } + + return NULL; +} + +static void * +rspamd_cryptobox_auth_verify_init(void *auth_ctx, void *enc_ctx, + enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + crypto_onetimeauth_state *mac_ctx; + guchar RSPAMD_ALIGNED(32) subkey[CHACHA_BLOCKBYTES]; + + mac_ctx = cryptobox_align_ptr(auth_ctx, CRYPTOBOX_ALIGNMENT); + memset(subkey, 0, sizeof(subkey)); + chacha_update(enc_ctx, subkey, subkey, sizeof(subkey)); + crypto_onetimeauth_init(mac_ctx, subkey); + rspamd_explicit_memzero(subkey, sizeof(subkey)); + + return mac_ctx; + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + auth_ctx = enc_ctx; + + return auth_ctx; +#endif + } + + return NULL; +} + +static gboolean +rspamd_cryptobox_decrypt_update(void *enc_ctx, const guchar *in, gsize inlen, + guchar *out, gsize *outlen, + enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + gsize r; + chacha_state *s; + + s = cryptobox_align_ptr(enc_ctx, CRYPTOBOX_ALIGNMENT); + r = chacha_update(s, in, out, inlen); + + if (outlen != NULL) { + *outlen = r; + } + + return TRUE; + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + EVP_CIPHER_CTX **s = enc_ctx; + gint r; + + r = outlen ? *outlen : inlen; + g_assert(EVP_DecryptUpdate(*s, out, &r, in, inlen) == 1); + + if (outlen) { + *outlen = r; + } + + return TRUE; +#endif + } +} + +static gboolean +rspamd_cryptobox_auth_verify_update(void *auth_ctx, + const guchar *in, gsize inlen, + enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + crypto_onetimeauth_state *mac_ctx; + + mac_ctx = cryptobox_align_ptr(auth_ctx, CRYPTOBOX_ALIGNMENT); + crypto_onetimeauth_update(mac_ctx, in, inlen); + + return TRUE; + } + else { +#ifndef HAVE_USABLE_OPENSSL + /* We do not need to authenticate as a separate process */ + return TRUE; +#else +#endif + } + + return FALSE; +} + +static gboolean +rspamd_cryptobox_decrypt_final(void *enc_ctx, guchar *out, gsize remain, + enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + chacha_state *s; + + s = cryptobox_align_ptr(enc_ctx, CRYPTOBOX_ALIGNMENT); + chacha_final(s, out); + + return TRUE; + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + EVP_CIPHER_CTX **s = enc_ctx; + gint r = remain; + + if (EVP_DecryptFinal_ex(*s, out, &r) < 0) { + return FALSE; + } + + return TRUE; +#endif + } + + return FALSE; +} + +static gboolean +rspamd_cryptobox_auth_verify_final(void *auth_ctx, const rspamd_mac_t sig, + enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + rspamd_mac_t mac; + crypto_onetimeauth_state *mac_ctx; + + mac_ctx = cryptobox_align_ptr(auth_ctx, CRYPTOBOX_ALIGNMENT); + crypto_onetimeauth_final(mac_ctx, mac); + + if (crypto_verify_16(mac, sig) != 0) { + return FALSE; + } + + return TRUE; + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + EVP_CIPHER_CTX **s = auth_ctx; + + if (EVP_CIPHER_CTX_ctrl(*s, EVP_CTRL_GCM_SET_TAG, 16, (guchar *) sig) != 1) { + return FALSE; + } + + return TRUE; +#endif + } + + return FALSE; +} + + +static void +rspamd_cryptobox_cleanup(void *enc_ctx, void *auth_ctx, + enum rspamd_cryptobox_mode mode) +{ + if (G_LIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + crypto_onetimeauth_state *mac_ctx; + + mac_ctx = cryptobox_align_ptr(auth_ctx, CRYPTOBOX_ALIGNMENT); + rspamd_explicit_memzero(mac_ctx, sizeof(*mac_ctx)); + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + EVP_CIPHER_CTX **s = enc_ctx; + + EVP_CIPHER_CTX_cleanup(*s); + EVP_CIPHER_CTX_free(*s); +#endif + } +} + +void rspamd_cryptobox_encrypt_nm_inplace(guchar *data, gsize len, + const rspamd_nonce_t nonce, + const rspamd_nm_t nm, + rspamd_mac_t sig, + enum rspamd_cryptobox_mode mode) +{ + gsize r; + void *enc_ctx, *auth_ctx; + + enc_ctx = g_alloca(rspamd_cryptobox_encrypt_ctx_len(mode)); + auth_ctx = g_alloca(rspamd_cryptobox_auth_ctx_len(mode)); + + enc_ctx = rspamd_cryptobox_encrypt_init(enc_ctx, nonce, nm, mode); + auth_ctx = rspamd_cryptobox_auth_init(auth_ctx, enc_ctx, mode); + + rspamd_cryptobox_encrypt_update(enc_ctx, data, len, data, &r, mode); + rspamd_cryptobox_encrypt_final(enc_ctx, data + r, len - r, mode); + + rspamd_cryptobox_auth_update(auth_ctx, data, len, mode); + rspamd_cryptobox_auth_final(auth_ctx, sig, mode); + + rspamd_cryptobox_cleanup(enc_ctx, auth_ctx, mode); +} + +static void +rspamd_cryptobox_flush_outbuf(struct rspamd_cryptobox_segment *st, + const guchar *buf, gsize len, gsize offset) +{ + gsize cpy_len; + + while (len > 0) { + cpy_len = MIN(len, st->len - offset); + memcpy(st->data + offset, buf, cpy_len); + st++; + buf += cpy_len; + len -= cpy_len; + offset = 0; + } +} + +void rspamd_cryptobox_encryptv_nm_inplace(struct rspamd_cryptobox_segment *segments, + gsize cnt, + const rspamd_nonce_t nonce, + const rspamd_nm_t nm, rspamd_mac_t sig, + enum rspamd_cryptobox_mode mode) +{ + struct rspamd_cryptobox_segment *cur = segments, *start_seg = segments; + guchar outbuf[CHACHA_BLOCKBYTES * 16]; + void *enc_ctx, *auth_ctx; + guchar *out, *in; + gsize r, remain, inremain, seg_offset; + + enc_ctx = g_alloca(rspamd_cryptobox_encrypt_ctx_len(mode)); + auth_ctx = g_alloca(rspamd_cryptobox_auth_ctx_len(mode)); + + enc_ctx = rspamd_cryptobox_encrypt_init(enc_ctx, nonce, nm, mode); + auth_ctx = rspamd_cryptobox_auth_init(auth_ctx, enc_ctx, mode); + + remain = sizeof(outbuf); + out = outbuf; + inremain = cur->len; + seg_offset = 0; + + for (;;) { + if (cur - segments == (gint) cnt) { + break; + } + + if (cur->len <= remain) { + memcpy(out, cur->data, cur->len); + remain -= cur->len; + out += cur->len; + cur++; + + if (remain == 0) { + rspamd_cryptobox_encrypt_update(enc_ctx, outbuf, sizeof(outbuf), + outbuf, NULL, mode); + rspamd_cryptobox_auth_update(auth_ctx, outbuf, sizeof(outbuf), + mode); + rspamd_cryptobox_flush_outbuf(start_seg, outbuf, + sizeof(outbuf), seg_offset); + start_seg = cur; + seg_offset = 0; + remain = sizeof(outbuf); + out = outbuf; + } + } + else { + memcpy(out, cur->data, remain); + rspamd_cryptobox_encrypt_update(enc_ctx, outbuf, sizeof(outbuf), + outbuf, NULL, mode); + rspamd_cryptobox_auth_update(auth_ctx, outbuf, sizeof(outbuf), + mode); + rspamd_cryptobox_flush_outbuf(start_seg, outbuf, sizeof(outbuf), + seg_offset); + seg_offset = 0; + + inremain = cur->len - remain; + in = cur->data + remain; + out = outbuf; + remain = 0; + start_seg = cur; + + while (inremain > 0) { + if (sizeof(outbuf) <= inremain) { + memcpy(outbuf, in, sizeof(outbuf)); + rspamd_cryptobox_encrypt_update(enc_ctx, + outbuf, + sizeof(outbuf), + outbuf, + NULL, + mode); + rspamd_cryptobox_auth_update(auth_ctx, + outbuf, + sizeof(outbuf), + mode); + memcpy(in, outbuf, sizeof(outbuf)); + in += sizeof(outbuf); + inremain -= sizeof(outbuf); + remain = sizeof(outbuf); + } + else { + memcpy(outbuf, in, inremain); + remain = sizeof(outbuf) - inremain; + out = outbuf + inremain; + inremain = 0; + } + } + + seg_offset = cur->len - (sizeof(outbuf) - remain); + cur++; + } + } + + rspamd_cryptobox_encrypt_update(enc_ctx, outbuf, sizeof(outbuf) - remain, + outbuf, &r, mode); + out = outbuf + r; + rspamd_cryptobox_encrypt_final(enc_ctx, out, sizeof(outbuf) - remain - r, + mode); + + rspamd_cryptobox_auth_update(auth_ctx, outbuf, sizeof(outbuf) - remain, + mode); + rspamd_cryptobox_auth_final(auth_ctx, sig, mode); + + rspamd_cryptobox_flush_outbuf(start_seg, outbuf, sizeof(outbuf) - remain, + seg_offset); + rspamd_cryptobox_cleanup(enc_ctx, auth_ctx, mode); +} + +gboolean +rspamd_cryptobox_decrypt_nm_inplace(guchar *data, gsize len, + const rspamd_nonce_t nonce, const rspamd_nm_t nm, + const rspamd_mac_t sig, enum rspamd_cryptobox_mode mode) +{ + gsize r = 0; + gboolean ret = TRUE; + void *enc_ctx, *auth_ctx; + + enc_ctx = g_alloca(rspamd_cryptobox_encrypt_ctx_len(mode)); + auth_ctx = g_alloca(rspamd_cryptobox_auth_ctx_len(mode)); + + enc_ctx = rspamd_cryptobox_decrypt_init(enc_ctx, nonce, nm, mode); + auth_ctx = rspamd_cryptobox_auth_verify_init(auth_ctx, enc_ctx, mode); + + rspamd_cryptobox_auth_verify_update(auth_ctx, data, len, mode); + + if (!rspamd_cryptobox_auth_verify_final(auth_ctx, sig, mode)) { + ret = FALSE; + } + else { + rspamd_cryptobox_decrypt_update(enc_ctx, data, len, data, &r, mode); + ret = rspamd_cryptobox_decrypt_final(enc_ctx, data + r, len - r, mode); + } + + rspamd_cryptobox_cleanup(enc_ctx, auth_ctx, mode); + + return ret; +} + +gboolean +rspamd_cryptobox_decrypt_inplace(guchar *data, gsize len, + const rspamd_nonce_t nonce, + const rspamd_pk_t pk, const rspamd_sk_t sk, + const rspamd_mac_t sig, + enum rspamd_cryptobox_mode mode) +{ + guchar nm[rspamd_cryptobox_MAX_NMBYTES]; + gboolean ret; + + rspamd_cryptobox_nm(nm, pk, sk, mode); + ret = rspamd_cryptobox_decrypt_nm_inplace(data, len, nonce, nm, sig, mode); + + rspamd_explicit_memzero(nm, sizeof(nm)); + + return ret; +} + +void rspamd_cryptobox_encrypt_inplace(guchar *data, gsize len, + const rspamd_nonce_t nonce, + const rspamd_pk_t pk, const rspamd_sk_t sk, + rspamd_mac_t sig, + enum rspamd_cryptobox_mode mode) +{ + guchar nm[rspamd_cryptobox_MAX_NMBYTES]; + + rspamd_cryptobox_nm(nm, pk, sk, mode); + rspamd_cryptobox_encrypt_nm_inplace(data, len, nonce, nm, sig, mode); + rspamd_explicit_memzero(nm, sizeof(nm)); +} + +void rspamd_cryptobox_encryptv_inplace(struct rspamd_cryptobox_segment *segments, + gsize cnt, + const rspamd_nonce_t nonce, + const rspamd_pk_t pk, const rspamd_sk_t sk, + rspamd_mac_t sig, + enum rspamd_cryptobox_mode mode) +{ + guchar nm[rspamd_cryptobox_MAX_NMBYTES]; + + rspamd_cryptobox_nm(nm, pk, sk, mode); + rspamd_cryptobox_encryptv_nm_inplace(segments, cnt, nonce, nm, sig, mode); + rspamd_explicit_memzero(nm, sizeof(nm)); +} + + +void rspamd_cryptobox_siphash(unsigned char *out, const unsigned char *in, + unsigned long long inlen, + const rspamd_sipkey_t k) +{ + crypto_shorthash_siphash24(out, in, inlen, k); +} + +/* + * Password-Based Key Derivation Function 2 (PKCS #5 v2.0). + * Code based on IEEE Std 802.11-2007, Annex H.4.2. + */ +static gboolean +rspamd_cryptobox_pbkdf2(const char *pass, gsize pass_len, + const guint8 *salt, gsize salt_len, guint8 *key, gsize key_len, + unsigned int rounds) +{ + guint8 *asalt, obuf[crypto_generichash_blake2b_BYTES_MAX]; + guint8 d1[crypto_generichash_blake2b_BYTES_MAX], + d2[crypto_generichash_blake2b_BYTES_MAX]; + unsigned int i, j; + unsigned int count; + gsize r; + + if (rounds < 1 || key_len == 0) { + return FALSE; + } + if (salt_len == 0 || salt_len > G_MAXSIZE - 4) { + return FALSE; + } + + asalt = g_malloc(salt_len + 4); + memcpy(asalt, salt, salt_len); + + for (count = 1; key_len > 0; count++) { + asalt[salt_len + 0] = (count >> 24) & 0xff; + asalt[salt_len + 1] = (count >> 16) & 0xff; + asalt[salt_len + 2] = (count >> 8) & 0xff; + asalt[salt_len + 3] = count & 0xff; + + if (pass_len <= crypto_generichash_blake2b_KEYBYTES_MAX) { + crypto_generichash_blake2b(d1, sizeof(d1), asalt, salt_len + 4, + pass, pass_len); + } + else { + guint8 k[crypto_generichash_blake2b_BYTES_MAX]; + + /* + * We use additional blake2 iteration to store large key + * XXX: it is not compatible with the original implementation but safe + */ + crypto_generichash_blake2b(k, sizeof(k), pass, pass_len, + NULL, 0); + crypto_generichash_blake2b(d1, sizeof(d1), asalt, salt_len + 4, + k, sizeof(k)); + } + + memcpy(obuf, d1, sizeof(obuf)); + + for (i = 1; i < rounds; i++) { + if (pass_len <= crypto_generichash_blake2b_KEYBYTES_MAX) { + crypto_generichash_blake2b(d2, sizeof(d2), d1, sizeof(d1), + pass, pass_len); + } + else { + guint8 k[crypto_generichash_blake2b_BYTES_MAX]; + + /* + * We use additional blake2 iteration to store large key + * XXX: it is not compatible with the original implementation but safe + */ + crypto_generichash_blake2b(k, sizeof(k), pass, pass_len, + NULL, 0); + crypto_generichash_blake2b(d2, sizeof(d2), d1, sizeof(d1), + k, sizeof(k)); + } + + memcpy(d1, d2, sizeof(d1)); + + for (j = 0; j < sizeof(obuf); j++) { + obuf[j] ^= d1[j]; + } + } + + r = MIN(key_len, crypto_generichash_blake2b_BYTES_MAX); + memcpy(key, obuf, r); + key += r; + key_len -= r; + } + + rspamd_explicit_memzero(asalt, salt_len + 4); + g_free(asalt); + rspamd_explicit_memzero(d1, sizeof(d1)); + rspamd_explicit_memzero(d2, sizeof(d2)); + rspamd_explicit_memzero(obuf, sizeof(obuf)); + + return TRUE; +} + +gboolean +rspamd_cryptobox_pbkdf(const char *pass, gsize pass_len, + const guint8 *salt, gsize salt_len, guint8 *key, gsize key_len, + unsigned int complexity, enum rspamd_cryptobox_pbkdf_type type) +{ + gboolean ret = FALSE; + + switch (type) { + case RSPAMD_CRYPTOBOX_CATENA: + if (catena(pass, pass_len, salt, salt_len, "rspamd", 6, + 4, complexity, complexity, key_len, key) == 0) { + ret = TRUE; + } + break; + case RSPAMD_CRYPTOBOX_PBKDF2: + default: + ret = rspamd_cryptobox_pbkdf2(pass, pass_len, salt, salt_len, key, + key_len, complexity); + break; + } + + return ret; +} + +guint rspamd_cryptobox_pk_bytes(enum rspamd_cryptobox_mode mode) +{ + if (G_UNLIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + return 32; + } + else { + return 65; + } +} + +guint rspamd_cryptobox_pk_sig_bytes(enum rspamd_cryptobox_mode mode) +{ + if (G_UNLIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + return 32; + } + else { + return 65; + } +} + +guint rspamd_cryptobox_nonce_bytes(enum rspamd_cryptobox_mode mode) +{ + if (G_UNLIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + return 24; + } + else { + return 16; + } +} + + +guint rspamd_cryptobox_sk_bytes(enum rspamd_cryptobox_mode mode) +{ + return 32; +} + +guint rspamd_cryptobox_sk_sig_bytes(enum rspamd_cryptobox_mode mode) +{ + if (G_UNLIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + return 64; + } + else { + return 32; + } +} + +guint rspamd_cryptobox_signature_bytes(enum rspamd_cryptobox_mode mode) +{ + static guint ssl_keylen; + + if (G_UNLIKELY(mode == RSPAMD_CRYPTOBOX_MODE_25519)) { + return 64; + } + else { +#ifndef HAVE_USABLE_OPENSSL + g_assert(0); +#else + if (ssl_keylen == 0) { + EC_KEY *lk; + lk = EC_KEY_new_by_curve_name(CRYPTOBOX_CURVE_NID); + ssl_keylen = ECDSA_size(lk); + EC_KEY_free(lk); + } +#endif + return ssl_keylen; + } +} + +guint rspamd_cryptobox_nm_bytes(enum rspamd_cryptobox_mode mode) +{ + return 32; +} + +guint rspamd_cryptobox_mac_bytes(enum rspamd_cryptobox_mode mode) +{ + return 16; +} + +void rspamd_cryptobox_hash_init(rspamd_cryptobox_hash_state_t *p, const guchar *key, gsize keylen) +{ + crypto_generichash_blake2b_state *st = cryptobox_align_ptr(p, + RSPAMD_ALIGNOF(crypto_generichash_blake2b_state)); + crypto_generichash_blake2b_init(st, key, keylen, + crypto_generichash_blake2b_BYTES_MAX); +} + +/** + * Update hash with data portion + */ +void rspamd_cryptobox_hash_update(rspamd_cryptobox_hash_state_t *p, const guchar *data, gsize len) +{ + crypto_generichash_blake2b_state *st = cryptobox_align_ptr(p, + RSPAMD_ALIGNOF(crypto_generichash_blake2b_state)); + crypto_generichash_blake2b_update(st, data, len); +} + +/** + * Output hash to the buffer of rspamd_cryptobox_HASHBYTES length + */ +void rspamd_cryptobox_hash_final(rspamd_cryptobox_hash_state_t *p, guchar *out) +{ + crypto_generichash_blake2b_state *st = cryptobox_align_ptr(p, + RSPAMD_ALIGNOF(crypto_generichash_blake2b_state)); + crypto_generichash_blake2b_final(st, out, crypto_generichash_blake2b_BYTES_MAX); +} + +/** + * One in all function + */ +void rspamd_cryptobox_hash(guchar *out, + const guchar *data, + gsize len, + const guchar *key, + gsize keylen) +{ + crypto_generichash_blake2b(out, crypto_generichash_blake2b_BYTES_MAX, + data, len, key, keylen); +} + +G_STATIC_ASSERT(sizeof(t1ha_context_t) <= + sizeof(((rspamd_cryptobox_fast_hash_state_t *) NULL)->opaque)); +G_STATIC_ASSERT(sizeof(struct XXH3_state_s) <= + sizeof(((rspamd_cryptobox_fast_hash_state_t *) NULL)->opaque)); + + +struct RSPAMD_ALIGNED(16) _mum_iuf { + union { + gint64 ll; + unsigned char b[sizeof(guint64)]; + } buf; + gint64 h; + unsigned rem; +}; + +rspamd_cryptobox_fast_hash_state_t * +rspamd_cryptobox_fast_hash_new(void) +{ + rspamd_cryptobox_fast_hash_state_t *nst; + int ret = posix_memalign((void **) &nst, RSPAMD_ALIGNOF(rspamd_cryptobox_fast_hash_state_t), + sizeof(rspamd_cryptobox_fast_hash_state_t)); + + if (ret != 0) { + abort(); + } + + return nst; +} + +void rspamd_cryptobox_fast_hash_free(rspamd_cryptobox_fast_hash_state_t *st) +{ + free(st); +} + +void rspamd_cryptobox_fast_hash_init(rspamd_cryptobox_fast_hash_state_t *st, + guint64 seed) +{ + XXH3_state_t *xst = (XXH3_state_t *) st->opaque; + st->type = RSPAMD_CRYPTOBOX_XXHASH3; + XXH3_INITSTATE(xst); + XXH3_64bits_reset_withSeed(xst, seed); +} + +void rspamd_cryptobox_fast_hash_init_specific(rspamd_cryptobox_fast_hash_state_t *st, + enum rspamd_cryptobox_fast_hash_type type, + guint64 seed) +{ + switch (type) { + case RSPAMD_CRYPTOBOX_T1HA: + case RSPAMD_CRYPTOBOX_HASHFAST: + case RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT: { + t1ha_context_t *rst = (t1ha_context_t *) st->opaque; + st->type = RSPAMD_CRYPTOBOX_T1HA; + t1ha2_init(rst, seed, 0); + break; + } + case RSPAMD_CRYPTOBOX_XXHASH64: { + XXH64_state_t *xst = (XXH64_state_t *) st->opaque; + memset(xst, 0, sizeof(*xst)); + st->type = RSPAMD_CRYPTOBOX_XXHASH64; + XXH64_reset(xst, seed); + break; + } + case RSPAMD_CRYPTOBOX_XXHASH32: { + XXH32_state_t *xst = (XXH32_state_t *) st->opaque; + memset(xst, 0, sizeof(*xst)); + st->type = RSPAMD_CRYPTOBOX_XXHASH32; + XXH32_reset(xst, seed); + break; + } + case RSPAMD_CRYPTOBOX_XXHASH3: { + XXH3_state_t *xst = (XXH3_state_t *) st->opaque; + XXH3_INITSTATE(xst); + st->type = RSPAMD_CRYPTOBOX_XXHASH3; + XXH3_64bits_reset_withSeed(xst, seed); + break; + } + case RSPAMD_CRYPTOBOX_MUMHASH: { + struct _mum_iuf *iuf = (struct _mum_iuf *) st->opaque; + st->type = RSPAMD_CRYPTOBOX_MUMHASH; + iuf->h = seed; + iuf->buf.ll = 0; + iuf->rem = 0; + break; + } + } +} + +void rspamd_cryptobox_fast_hash_update(rspamd_cryptobox_fast_hash_state_t *st, + const void *data, gsize len) +{ + if (st->type == RSPAMD_CRYPTOBOX_T1HA) { + t1ha_context_t *rst = (t1ha_context_t *) st->opaque; + t1ha2_update(rst, data, len); + } + else { + switch (st->type) { + case RSPAMD_CRYPTOBOX_XXHASH64: { + XXH64_state_t *xst = (XXH64_state_t *) st->opaque; + XXH64_update(xst, data, len); + break; + } + case RSPAMD_CRYPTOBOX_XXHASH32: { + XXH32_state_t *xst = (XXH32_state_t *) st->opaque; + XXH32_update(xst, data, len); + break; + } + case RSPAMD_CRYPTOBOX_XXHASH3: { + XXH3_state_t *xst = (XXH3_state_t *) st->opaque; + XXH3_64bits_update(xst, data, len); + break; + } + case RSPAMD_CRYPTOBOX_MUMHASH: { + struct _mum_iuf *iuf = (struct _mum_iuf *) st->opaque; + gsize drem = len; + const guchar *p = data; + + if (iuf->rem > 0) { + /* Process remainder */ + if (drem >= iuf->rem) { + memcpy(iuf->buf.b + sizeof(iuf->buf.ll) - iuf->rem, + p, iuf->rem); + drem -= iuf->rem; + p += iuf->rem; + iuf->h = mum_hash_step(iuf->h, iuf->buf.ll); + iuf->rem = 0; + } + else { + memcpy(iuf->buf.b + sizeof(iuf->buf.ll) - iuf->rem, p, drem); + iuf->rem -= drem; + drem = 0; + } + } + + while (drem >= sizeof(iuf->buf.ll)) { + memcpy(iuf->buf.b, p, sizeof(iuf->buf.ll)); + iuf->h = mum_hash_step(iuf->h, iuf->buf.ll); + drem -= sizeof(iuf->buf.ll); + p += sizeof(iuf->buf.ll); + } + + /* Leftover */ + if (drem > 0) { + iuf->rem = sizeof(guint64) - drem; + iuf->buf.ll = 0; + memcpy(iuf->buf.b, p, drem); + } + break; + } + case RSPAMD_CRYPTOBOX_T1HA: + case RSPAMD_CRYPTOBOX_HASHFAST: + case RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT: { + t1ha_context_t *rst = (t1ha_context_t *) st->opaque; + t1ha2_update(rst, data, len); + break; + } + } + } +} + +guint64 +rspamd_cryptobox_fast_hash_final(rspamd_cryptobox_fast_hash_state_t *st) +{ + guint64 ret; + + if (st->type == RSPAMD_CRYPTOBOX_T1HA) { + t1ha_context_t *rst = (t1ha_context_t *) st->opaque; + + return t1ha2_final(rst, NULL); + } + else { + switch (st->type) { + case RSPAMD_CRYPTOBOX_XXHASH64: { + XXH64_state_t *xst = (XXH64_state_t *) st->opaque; + ret = XXH64_digest(xst); + break; + } + case RSPAMD_CRYPTOBOX_XXHASH32: { + XXH32_state_t *xst = (XXH32_state_t *) st->opaque; + ret = XXH32_digest(xst); + break; + } + case RSPAMD_CRYPTOBOX_XXHASH3: { + XXH3_state_t *xst = (XXH3_state_t *) st->opaque; + ret = XXH3_64bits_digest(xst); + break; + } + case RSPAMD_CRYPTOBOX_MUMHASH: { + struct _mum_iuf *iuf = (struct _mum_iuf *) st->opaque; + iuf->h = mum_hash_step(iuf->h, iuf->buf.ll); + ret = mum_hash_finish(iuf->h); + break; + } + case RSPAMD_CRYPTOBOX_T1HA: + case RSPAMD_CRYPTOBOX_HASHFAST: + case RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT: { + t1ha_context_t *rst = (t1ha_context_t *) st->opaque; + + ret = t1ha2_final(rst, NULL); + break; + } + } + } + + return ret; +} + +/** + * One in all function + */ +static inline guint64 +rspamd_cryptobox_fast_hash_machdep(const void *data, + gsize len, guint64 seed) +{ + return XXH3_64bits_withSeed(data, len, seed); +} + +static inline guint64 +rspamd_cryptobox_fast_hash_indep(const void *data, + gsize len, guint64 seed) +{ + return XXH3_64bits_withSeed(data, len, seed); +} + +guint64 +rspamd_cryptobox_fast_hash(const void *data, + gsize len, guint64 seed) +{ + return rspamd_cryptobox_fast_hash_machdep(data, len, seed); +} + +guint64 +rspamd_cryptobox_fast_hash_specific( + enum rspamd_cryptobox_fast_hash_type type, + const void *data, + gsize len, guint64 seed) +{ + switch (type) { + case RSPAMD_CRYPTOBOX_XXHASH32: + return XXH32(data, len, seed); + case RSPAMD_CRYPTOBOX_XXHASH3: + return XXH3_64bits_withSeed(data, len, seed); + case RSPAMD_CRYPTOBOX_XXHASH64: + return XXH64(data, len, seed); + case RSPAMD_CRYPTOBOX_MUMHASH: + return mum_hash(data, len, seed); + case RSPAMD_CRYPTOBOX_T1HA: + return t1ha2_atonce(data, len, seed); + case RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT: + return rspamd_cryptobox_fast_hash_indep(data, len, seed); + case RSPAMD_CRYPTOBOX_HASHFAST: + default: + return rspamd_cryptobox_fast_hash_machdep(data, len, seed); + } +} diff --git a/src/libcryptobox/cryptobox.h b/src/libcryptobox/cryptobox.h new file mode 100644 index 0000000..8cd79bb --- /dev/null +++ b/src/libcryptobox/cryptobox.h @@ -0,0 +1,437 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef CRYPTOBOX_H_ +#define CRYPTOBOX_H_ + +#include "config.h" + +#include <sodium.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct rspamd_cryptobox_segment { + guchar *data; + gsize len; +}; + +#if defined(__GNUC__) && \ + ((defined(__clang__) && (__clang_major__ >= 4 || (__clang_major__ >= 3 && __clang_minor__ >= 8))) || \ + ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 8) || (__GNUC__ > 4))) +#define RSPAMD_HAS_TARGET_ATTR 1 +#endif + +#define rspamd_cryptobox_MAX_NONCEBYTES 24 +#define rspamd_cryptobox_MAX_PKBYTES 65 +#define rspamd_cryptobox_MAX_SKBYTES 32 +#define rspamd_cryptobox_MAX_MACBYTES 16 +#define rspamd_cryptobox_MAX_NMBYTES 32 +#define rspamd_cryptobox_SIPKEYBYTES 16 +#define rspamd_cryptobox_HASHBYTES 64 +#define rspamd_cryptobox_HASHKEYBYTES 64 +#define rspamd_cryptobox_HASHSTATEBYTES sizeof(crypto_generichash_blake2b_state) + 64 +#define rspamd_cryptobox_MAX_SIGSKBYTES 64 +#define rspamd_cryptobox_MAX_SIGPKBYTES 32 +#define rspamd_cryptobox_MAX_SIGBYTES 72 + +#define CPUID_AVX2 0x1 +#define CPUID_AVX 0x2 +#define CPUID_SSE2 0x4 +#define CPUID_SSE3 0x8 +#define CPUID_SSSE3 0x10 +#define CPUID_SSE41 0x20 +#define CPUID_SSE42 0x40 +#define CPUID_RDRAND 0x80 + +typedef guchar rspamd_pk_t[rspamd_cryptobox_MAX_PKBYTES]; +typedef guchar rspamd_sk_t[rspamd_cryptobox_MAX_SKBYTES]; +typedef guchar rspamd_mac_t[rspamd_cryptobox_MAX_MACBYTES]; +typedef guchar rspamd_nm_t[rspamd_cryptobox_MAX_NMBYTES]; +typedef guchar rspamd_nonce_t[rspamd_cryptobox_MAX_NONCEBYTES]; +typedef guchar rspamd_sipkey_t[rspamd_cryptobox_SIPKEYBYTES]; +typedef guchar rspamd_signature_t[rspamd_cryptobox_MAX_SIGBYTES]; +typedef guchar rspamd_sig_pk_t[rspamd_cryptobox_MAX_SIGPKBYTES]; +typedef guchar rspamd_sig_sk_t[rspamd_cryptobox_MAX_SIGSKBYTES]; + +enum rspamd_cryptobox_mode { + RSPAMD_CRYPTOBOX_MODE_25519 = 0, + RSPAMD_CRYPTOBOX_MODE_NIST +}; + +struct rspamd_cryptobox_library_ctx { + gchar *cpu_extensions; + const gchar *chacha20_impl; + const gchar *base64_impl; + unsigned long cpu_config; +}; + +/** + * Init cryptobox library + */ +struct rspamd_cryptobox_library_ctx *rspamd_cryptobox_init(void); + +void rspamd_cryptobox_deinit(struct rspamd_cryptobox_library_ctx *); +/** + * Generate new keypair + * @param pk public key buffer + * @param sk secret key buffer + */ +void rspamd_cryptobox_keypair(rspamd_pk_t pk, rspamd_sk_t sk, + enum rspamd_cryptobox_mode mode); + +/** + * Generate new keypair for signing + * @param pk public key buffer + * @param sk secret key buffer + */ +void rspamd_cryptobox_keypair_sig(rspamd_sig_pk_t pk, rspamd_sig_sk_t sk, + enum rspamd_cryptobox_mode mode); + +/** + * Encrypt data inplace adding signature to sig afterwards + * @param data input buffer + * @param pk remote pubkey + * @param sk local secret key + * @param sig output signature + */ +void rspamd_cryptobox_encrypt_inplace(guchar *data, gsize len, + const rspamd_nonce_t nonce, + const rspamd_pk_t pk, const rspamd_sk_t sk, rspamd_mac_t sig, + enum rspamd_cryptobox_mode mode); + +/** + * Encrypt segments of data inplace adding signature to sig afterwards + * @param segments segments of data + * @param cnt count of segments + * @param pk remote pubkey + * @param sk local secret key + * @param sig output signature + */ +void rspamd_cryptobox_encryptv_inplace(struct rspamd_cryptobox_segment *segments, + gsize cnt, + const rspamd_nonce_t nonce, + const rspamd_pk_t pk, const rspamd_sk_t sk, rspamd_mac_t sig, + enum rspamd_cryptobox_mode mode); + + +/** + * Decrypt and verify data chunk inplace + * @param data data to decrypt + * @param len length of data + * @param pk remote pubkey + * @param sk local privkey + * @param sig signature input + * @return TRUE if input has been verified successfully + */ +gboolean rspamd_cryptobox_decrypt_inplace(guchar *data, gsize len, + const rspamd_nonce_t nonce, + const rspamd_pk_t pk, const rspamd_sk_t sk, const rspamd_mac_t sig, + enum rspamd_cryptobox_mode mode); + +/** + * Encrypt segments of data inplace adding signature to sig afterwards + * @param segments segments of data + * @param cnt count of segments + * @param pk remote pubkey + * @param sk local secret key + * @param sig output signature + */ +void rspamd_cryptobox_encrypt_nm_inplace(guchar *data, gsize len, + const rspamd_nonce_t nonce, + const rspamd_nm_t nm, rspamd_mac_t sig, + enum rspamd_cryptobox_mode mode); + +/** + * Encrypt segments of data inplace adding signature to sig afterwards + * @param segments segments of data + * @param cnt count of segments + * @param pk remote pubkey + * @param sk local secret key + * @param sig output signature + */ +void rspamd_cryptobox_encryptv_nm_inplace(struct rspamd_cryptobox_segment *segments, + gsize cnt, + const rspamd_nonce_t nonce, + const rspamd_nm_t nm, rspamd_mac_t sig, + enum rspamd_cryptobox_mode mode); + + +/** + * Decrypt and verify data chunk inplace + * @param data data to decrypt + * @param len length of data + * @param pk remote pubkey + * @param sk local privkey + * @param sig signature input + * @return TRUE if input has been verified successfully + */ +gboolean rspamd_cryptobox_decrypt_nm_inplace(guchar *data, gsize len, + const rspamd_nonce_t nonce, + const rspamd_nm_t nm, const rspamd_mac_t sig, + enum rspamd_cryptobox_mode mode); + +/** + * Generate shared secret from local sk and remote pk + * @param nm shared secret + * @param pk remote pubkey + * @param sk local privkey + */ +void rspamd_cryptobox_nm(rspamd_nm_t nm, const rspamd_pk_t pk, + const rspamd_sk_t sk, enum rspamd_cryptobox_mode mode); + +/** + * Create digital signature for the specified message and place result in `sig` + * @param sig signature target + * @param siglen_p pointer to signature length (might be NULL) + * @param m input message + * @param mlen input length + * @param sk secret key + */ +void rspamd_cryptobox_sign(guchar *sig, unsigned long long *siglen_p, + const guchar *m, gsize mlen, + const rspamd_sk_t sk, + enum rspamd_cryptobox_mode mode); + +/** + * Verifies digital signature for the specified message using the specified + * pubkey + * @param sig signature source + * @param m input message + * @param mlen message length + * @param pk public key for verification + * @return true if signature is valid, false otherwise + */ +bool rspamd_cryptobox_verify(const guchar *sig, + gsize siglen, + const guchar *m, + gsize mlen, + const rspamd_pk_t pk, + enum rspamd_cryptobox_mode mode); + +/** + * Securely clear the buffer specified + * @param buf buffer to zero + * @param buflen length of buffer + */ + +#define rspamd_explicit_memzero sodium_memzero + +/** + * Constant time memcmp + * @param b1_ + * @param b2_ + * @param len + * @return + */ +#define rspamd_cryptobox_memcmp sodium_memcmp + +/** + * Calculates siphash-2-4 for a message + * @param out (8 bytes output) + * @param in + * @param inlen + * @param k key (must be 16 bytes) + */ +void rspamd_cryptobox_siphash(unsigned char *out, const unsigned char *in, + unsigned long long inlen, + const rspamd_sipkey_t k); + +enum rspamd_cryptobox_pbkdf_type { + RSPAMD_CRYPTOBOX_PBKDF2 = 0, + RSPAMD_CRYPTOBOX_CATENA +}; + + +/** + * Derive key from password using the specified algorithm + * @param pass input password + * @param pass_len length of the password + * @param salt input salt + * @param salt_len length of salt + * @param key output key + * @param key_len size of the key + * @param complexity empiric number of complexity (rounds for pbkdf2 and garlic for catena) + * @return TRUE in case of success and FALSE if failed + */ +gboolean rspamd_cryptobox_pbkdf(const char *pass, gsize pass_len, + const guint8 *salt, gsize salt_len, + guint8 *key, gsize key_len, + unsigned int complexity, + enum rspamd_cryptobox_pbkdf_type type); + + +/** + * Real size of rspamd cryptobox public key + */ +guint rspamd_cryptobox_pk_bytes(enum rspamd_cryptobox_mode mode); + +/** + * Real size of rspamd cryptobox signing public key + */ +guint rspamd_cryptobox_pk_sig_bytes(enum rspamd_cryptobox_mode mode); + +/** + * Real size of crypto nonce + */ +guint rspamd_cryptobox_nonce_bytes(enum rspamd_cryptobox_mode mode); + +/** + * Real size of rspamd cryptobox secret key + */ +guint rspamd_cryptobox_sk_bytes(enum rspamd_cryptobox_mode mode); + +/** + * Real size of rspamd cryptobox signing secret key + */ +guint rspamd_cryptobox_sk_sig_bytes(enum rspamd_cryptobox_mode mode); + +/** + * Real size of rspamd cryptobox shared key + */ +guint rspamd_cryptobox_nm_bytes(enum rspamd_cryptobox_mode mode); + +/** + * Real size of rspamd cryptobox MAC signature + */ +guint rspamd_cryptobox_mac_bytes(enum rspamd_cryptobox_mode mode); + +/** + * Real size of rspamd cryptobox digital signature + */ +guint rspamd_cryptobox_signature_bytes(enum rspamd_cryptobox_mode mode); + +/* Hash IUF interface */ +typedef crypto_generichash_blake2b_state rspamd_cryptobox_hash_state_t; + +/** + * Init cryptobox hash state using key if needed, `st` must point to the buffer + * with at least rspamd_cryptobox_HASHSTATEBYTES bytes length. If keylen == 0, then + * non-keyed hash is generated + */ +void rspamd_cryptobox_hash_init(rspamd_cryptobox_hash_state_t *st, + const guchar *key, gsize keylen); + +/** + * Update hash with data portion + */ +void rspamd_cryptobox_hash_update(rspamd_cryptobox_hash_state_t *st, + const guchar *data, gsize len); + +/** + * Output hash to the buffer of rspamd_cryptobox_HASHBYTES length + */ +void rspamd_cryptobox_hash_final(rspamd_cryptobox_hash_state_t *st, guchar *out); + +/** + * One in all function + */ +void rspamd_cryptobox_hash(guchar *out, + const guchar *data, + gsize len, + const guchar *key, + gsize keylen); + +enum rspamd_cryptobox_fast_hash_type { + RSPAMD_CRYPTOBOX_XXHASH64 = 0, + RSPAMD_CRYPTOBOX_XXHASH32, + RSPAMD_CRYPTOBOX_XXHASH3, + RSPAMD_CRYPTOBOX_MUMHASH, + RSPAMD_CRYPTOBOX_T1HA, + RSPAMD_CRYPTOBOX_HASHFAST, + RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT +}; + +/* Non crypto hash IUF interface */ +typedef struct CRYPTO_ALIGN(64) rspamd_cryptobox_fast_hash_state_s { + guchar opaque[576]; /* Required for xxhash3 */ + enum rspamd_cryptobox_fast_hash_type type; +} rspamd_cryptobox_fast_hash_state_t; + + +/** + * Creates a new cryptobox state properly aligned + * @return + */ +rspamd_cryptobox_fast_hash_state_t *rspamd_cryptobox_fast_hash_new(void); +void rspamd_cryptobox_fast_hash_free(rspamd_cryptobox_fast_hash_state_t *st); + +/** + * Init cryptobox hash state using key if needed, `st` must point to the buffer + * with at least rspamd_cryptobox_HASHSTATEBYTES bytes length. If keylen == 0, then + * non-keyed hash is generated + */ +void rspamd_cryptobox_fast_hash_init(rspamd_cryptobox_fast_hash_state_t *st, + guint64 seed); + +/** + * Init cryptobox hash state using key if needed, `st` must point to the buffer + * with at least rspamd_cryptobox_HASHSTATEBYTES bytes length. If keylen == 0, then + * non-keyed hash is generated + */ +void rspamd_cryptobox_fast_hash_init_specific(rspamd_cryptobox_fast_hash_state_t *st, + enum rspamd_cryptobox_fast_hash_type type, + guint64 seed); + +/** + * Update hash with data portion + */ +void rspamd_cryptobox_fast_hash_update(rspamd_cryptobox_fast_hash_state_t *st, + const void *data, gsize len); + +/** + * Output hash to the buffer of rspamd_cryptobox_HASHBYTES length + */ +guint64 rspamd_cryptobox_fast_hash_final(rspamd_cryptobox_fast_hash_state_t *st); + +/** + * One in all function + */ +guint64 rspamd_cryptobox_fast_hash(const void *data, + gsize len, guint64 seed); + +/** + * Platform independent version + */ +guint64 rspamd_cryptobox_fast_hash_specific( + enum rspamd_cryptobox_fast_hash_type type, + const void *data, + gsize len, guint64 seed); + +/** + * Decode base64 using platform optimized code + * @param in + * @param inlen + * @param out + * @param outlen + * @return + */ +gboolean rspamd_cryptobox_base64_decode(const gchar *in, gsize inlen, + guchar *out, gsize *outlen); + +/** + * Returns TRUE if data looks like a valid base64 string + * @param in + * @param inlen + * @return + */ +gboolean rspamd_cryptobox_base64_is_valid(const gchar *in, gsize inlen); + +#ifdef __cplusplus +} +#endif + +#endif /* CRYPTOBOX_H_ */ diff --git a/src/libcryptobox/keypair.c b/src/libcryptobox/keypair.c new file mode 100644 index 0000000..ec7490a --- /dev/null +++ b/src/libcryptobox/keypair.c @@ -0,0 +1,1021 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "config.h" +#include "libcryptobox/keypair.h" +#include "libcryptobox/keypair_private.h" +#include "libutil/str_util.h" +#include "libutil/printf.h" +#include "contrib/libottery/ottery.h" + +const guchar encrypted_magic[7] = {'r', 'u', 'c', 'l', 'e', 'v', '1'}; + +static GQuark +rspamd_keypair_quark(void) +{ + return g_quark_from_static_string("rspamd-cryptobox-keypair"); +} + +/** + * Returns specific private key for different keypair types + */ +static void * +rspamd_cryptobox_keypair_sk(struct rspamd_cryptobox_keypair *kp, + guint *len) +{ + g_assert(kp != NULL); + + if (kp->alg == RSPAMD_CRYPTOBOX_MODE_25519) { + if (kp->type == RSPAMD_KEYPAIR_KEX) { + *len = 32; + return RSPAMD_CRYPTOBOX_KEYPAIR_25519(kp)->sk; + } + else { + *len = 64; + return RSPAMD_CRYPTOBOX_KEYPAIR_SIG_25519(kp)->sk; + } + } + else { + if (kp->type == RSPAMD_KEYPAIR_KEX) { + *len = 32; + return RSPAMD_CRYPTOBOX_KEYPAIR_NIST(kp)->sk; + } + else { + *len = 32; + return RSPAMD_CRYPTOBOX_KEYPAIR_SIG_NIST(kp)->sk; + } + } + + /* Not reached */ + return NULL; +} + +static void * +rspamd_cryptobox_keypair_pk(struct rspamd_cryptobox_keypair *kp, + guint *len) +{ + g_assert(kp != NULL); + + if (kp->alg == RSPAMD_CRYPTOBOX_MODE_25519) { + if (kp->type == RSPAMD_KEYPAIR_KEX) { + *len = 32; + return RSPAMD_CRYPTOBOX_KEYPAIR_25519(kp)->pk; + } + else { + *len = 32; + return RSPAMD_CRYPTOBOX_KEYPAIR_SIG_25519(kp)->pk; + } + } + else { + if (kp->type == RSPAMD_KEYPAIR_KEX) { + *len = 65; + return RSPAMD_CRYPTOBOX_KEYPAIR_NIST(kp)->pk; + } + else { + *len = 65; + return RSPAMD_CRYPTOBOX_KEYPAIR_SIG_NIST(kp)->pk; + } + } + + /* Not reached */ + return NULL; +} + +static void * +rspamd_cryptobox_pubkey_pk(const struct rspamd_cryptobox_pubkey *kp, + guint *len) +{ + g_assert(kp != NULL); + + if (kp->alg == RSPAMD_CRYPTOBOX_MODE_25519) { + if (kp->type == RSPAMD_KEYPAIR_KEX) { + *len = 32; + return RSPAMD_CRYPTOBOX_PUBKEY_25519(kp)->pk; + } + else { + *len = 32; + return RSPAMD_CRYPTOBOX_PUBKEY_SIG_25519(kp)->pk; + } + } + else { + if (kp->type == RSPAMD_KEYPAIR_KEX) { + *len = 65; + return RSPAMD_CRYPTOBOX_PUBKEY_NIST(kp)->pk; + } + else { + *len = 65; + return RSPAMD_CRYPTOBOX_PUBKEY_SIG_NIST(kp)->pk; + } + } + + /* Not reached */ + return NULL; +} + +static struct rspamd_cryptobox_keypair * +rspamd_cryptobox_keypair_alloc(enum rspamd_cryptobox_keypair_type type, + enum rspamd_cryptobox_mode alg) +{ + struct rspamd_cryptobox_keypair *kp; + guint size = 0; + + if (alg == RSPAMD_CRYPTOBOX_MODE_25519) { + if (type == RSPAMD_KEYPAIR_KEX) { + size = sizeof(struct rspamd_cryptobox_keypair_25519); + } + else { + size = sizeof(struct rspamd_cryptobox_keypair_sig_25519); + } + } + else { + if (type == RSPAMD_KEYPAIR_KEX) { + size = sizeof(struct rspamd_cryptobox_keypair_nist); + } + else { + size = sizeof(struct rspamd_cryptobox_keypair_sig_nist); + } + } + + g_assert(size >= sizeof(*kp)); + + if (posix_memalign((void **) &kp, 32, size) != 0) { + abort(); + } + + memset(kp, 0, size); + + return kp; +} + +static struct rspamd_cryptobox_pubkey * +rspamd_cryptobox_pubkey_alloc(enum rspamd_cryptobox_keypair_type type, + enum rspamd_cryptobox_mode alg) +{ + struct rspamd_cryptobox_pubkey *pk; + guint size = 0; + + if (alg == RSPAMD_CRYPTOBOX_MODE_25519) { + if (type == RSPAMD_KEYPAIR_KEX) { + size = sizeof(struct rspamd_cryptobox_pubkey_25519); + } + else { + size = sizeof(struct rspamd_cryptobox_pubkey_sig_25519); + } + } + else { + if (type == RSPAMD_KEYPAIR_KEX) { + size = sizeof(struct rspamd_cryptobox_pubkey_nist); + } + else { + size = sizeof(struct rspamd_cryptobox_pubkey_sig_nist); + } + } + + g_assert(size >= sizeof(*pk)); + + if (posix_memalign((void **) &pk, 32, size) != 0) { + abort(); + } + + memset(pk, 0, size); + + return pk; +} + + +void rspamd_cryptobox_nm_dtor(struct rspamd_cryptobox_nm *nm) +{ + rspamd_explicit_memzero(nm->nm, sizeof(nm->nm)); + free(nm); +} + +void rspamd_cryptobox_keypair_dtor(struct rspamd_cryptobox_keypair *kp) +{ + void *sk; + guint len = 0; + + sk = rspamd_cryptobox_keypair_sk(kp, &len); + g_assert(sk != NULL && len > 0); + rspamd_explicit_memzero(sk, len); + + if (kp->extensions) { + ucl_object_unref(kp->extensions); + } + + /* Not g_free as kp is aligned using posix_memalign */ + free(kp); +} + +void rspamd_cryptobox_pubkey_dtor(struct rspamd_cryptobox_pubkey *p) +{ + if (p->nm) { + REF_RELEASE(p->nm); + } + + /* Not g_free as p is aligned using posix_memalign */ + free(p); +} + +struct rspamd_cryptobox_keypair * +rspamd_keypair_new(enum rspamd_cryptobox_keypair_type type, + enum rspamd_cryptobox_mode alg) +{ + struct rspamd_cryptobox_keypair *kp; + void *pk, *sk; + guint size; + + kp = rspamd_cryptobox_keypair_alloc(type, alg); + kp->alg = alg; + kp->type = type; + + sk = rspamd_cryptobox_keypair_sk(kp, &size); + pk = rspamd_cryptobox_keypair_pk(kp, &size); + + if (type == RSPAMD_KEYPAIR_KEX) { + rspamd_cryptobox_keypair(pk, sk, alg); + } + else { + rspamd_cryptobox_keypair_sig(pk, sk, alg); + } + + rspamd_cryptobox_hash(kp->id, pk, size, NULL, 0); + + REF_INIT_RETAIN(kp, rspamd_cryptobox_keypair_dtor); + + return kp; +} + + +struct rspamd_cryptobox_keypair * +rspamd_keypair_ref(struct rspamd_cryptobox_keypair *kp) +{ + REF_RETAIN(kp); + return kp; +} + + +void rspamd_keypair_unref(struct rspamd_cryptobox_keypair *kp) +{ + REF_RELEASE(kp); +} + + +struct rspamd_cryptobox_pubkey * +rspamd_pubkey_ref(struct rspamd_cryptobox_pubkey *kp) +{ + REF_RETAIN(kp); + return kp; +} + +void rspamd_pubkey_unref(struct rspamd_cryptobox_pubkey *kp) +{ + REF_RELEASE(kp); +} + +enum rspamd_cryptobox_keypair_type +rspamd_keypair_type(struct rspamd_cryptobox_keypair *kp) +{ + g_assert(kp != NULL); + + return kp->type; +} + +enum rspamd_cryptobox_keypair_type +rspamd_pubkey_type(struct rspamd_cryptobox_pubkey *p) +{ + g_assert(p != NULL); + + return p->type; +} + + +enum rspamd_cryptobox_mode +rspamd_keypair_alg(struct rspamd_cryptobox_keypair *kp) +{ + g_assert(kp != NULL); + + return kp->alg; +} + +enum rspamd_cryptobox_mode +rspamd_pubkey_alg(struct rspamd_cryptobox_pubkey *p) +{ + g_assert(p != NULL); + + return p->alg; +} + +struct rspamd_cryptobox_pubkey * +rspamd_pubkey_from_base32(const gchar *b32, + gsize len, + enum rspamd_cryptobox_keypair_type type, + enum rspamd_cryptobox_mode alg) +{ + guchar *decoded; + gsize dlen, expected_len; + guint pklen; + struct rspamd_cryptobox_pubkey *pk; + guchar *pk_data; + + g_assert(b32 != NULL); + + if (len == 0) { + len = strlen(b32); + } + + decoded = rspamd_decode_base32(b32, len, &dlen, RSPAMD_BASE32_DEFAULT); + + if (decoded == NULL) { + return NULL; + } + + expected_len = (type == RSPAMD_KEYPAIR_KEX) ? rspamd_cryptobox_pk_bytes(alg) : rspamd_cryptobox_pk_sig_bytes(alg); + + if (dlen != expected_len) { + g_free(decoded); + return NULL; + } + + pk = rspamd_cryptobox_pubkey_alloc(type, alg); + REF_INIT_RETAIN(pk, rspamd_cryptobox_pubkey_dtor); + pk->alg = alg; + pk->type = type; + pk_data = rspamd_cryptobox_pubkey_pk(pk, &pklen); + + memcpy(pk_data, decoded, pklen); + g_free(decoded); + rspamd_cryptobox_hash(pk->id, pk_data, pklen, NULL, 0); + + return pk; +} + +struct rspamd_cryptobox_pubkey * +rspamd_pubkey_from_hex(const gchar *hex, + gsize len, + enum rspamd_cryptobox_keypair_type type, + enum rspamd_cryptobox_mode alg) +{ + guchar *decoded; + gsize dlen, expected_len; + guint pklen; + struct rspamd_cryptobox_pubkey *pk; + guchar *pk_data; + + g_assert(hex != NULL); + + if (len == 0) { + len = strlen(hex); + } + + dlen = len / 2; + + decoded = rspamd_decode_hex(hex, len); + + if (decoded == NULL) { + return NULL; + } + + expected_len = (type == RSPAMD_KEYPAIR_KEX) ? rspamd_cryptobox_pk_bytes(alg) : rspamd_cryptobox_pk_sig_bytes(alg); + + if (dlen != expected_len) { + g_free(decoded); + return NULL; + } + + pk = rspamd_cryptobox_pubkey_alloc(type, alg); + REF_INIT_RETAIN(pk, rspamd_cryptobox_pubkey_dtor); + pk->alg = alg; + pk->type = type; + pk_data = rspamd_cryptobox_pubkey_pk(pk, &pklen); + + memcpy(pk_data, decoded, pklen); + g_free(decoded); + rspamd_cryptobox_hash(pk->id, pk_data, pklen, NULL, 0); + + return pk; +} + +struct rspamd_cryptobox_pubkey * +rspamd_pubkey_from_bin(const guchar *raw, + gsize len, + enum rspamd_cryptobox_keypair_type type, + enum rspamd_cryptobox_mode alg) +{ + gsize expected_len; + guint pklen; + struct rspamd_cryptobox_pubkey *pk; + guchar *pk_data; + + g_assert(raw != NULL && len > 0); + + expected_len = (type == RSPAMD_KEYPAIR_KEX) ? rspamd_cryptobox_pk_bytes(alg) : rspamd_cryptobox_pk_sig_bytes(alg); + + if (len != expected_len) { + return NULL; + } + + pk = rspamd_cryptobox_pubkey_alloc(type, alg); + REF_INIT_RETAIN(pk, rspamd_cryptobox_pubkey_dtor); + pk->alg = alg; + pk->type = type; + pk_data = rspamd_cryptobox_pubkey_pk(pk, &pklen); + + memcpy(pk_data, raw, pklen); + rspamd_cryptobox_hash(pk->id, pk_data, pklen, NULL, 0); + + return pk; +} + + +const guchar * +rspamd_pubkey_get_nm(struct rspamd_cryptobox_pubkey *p, + struct rspamd_cryptobox_keypair *kp) +{ + g_assert(p != NULL); + + if (p->nm) { + if (memcmp(kp->id, (const guchar *) &p->nm->sk_id, sizeof(guint64)) == 0) { + return p->nm->nm; + } + + /* Wrong ID, need to recalculate */ + REF_RELEASE(p->nm); + p->nm = NULL; + } + + return NULL; +} + +const guchar * +rspamd_pubkey_calculate_nm(struct rspamd_cryptobox_pubkey *p, + struct rspamd_cryptobox_keypair *kp) +{ + g_assert(kp->alg == p->alg); + g_assert(kp->type == p->type); + g_assert(p->type == RSPAMD_KEYPAIR_KEX); + + if (p->nm == NULL) { + if (posix_memalign((void **) &p->nm, 32, sizeof(*p->nm)) != 0) { + abort(); + } + + memcpy(&p->nm->sk_id, kp->id, sizeof(guint64)); + REF_INIT_RETAIN(p->nm, rspamd_cryptobox_nm_dtor); + } + + if (kp->alg == RSPAMD_CRYPTOBOX_MODE_25519) { + struct rspamd_cryptobox_pubkey_25519 *rk_25519 = + RSPAMD_CRYPTOBOX_PUBKEY_25519(p); + struct rspamd_cryptobox_keypair_25519 *sk_25519 = + RSPAMD_CRYPTOBOX_KEYPAIR_25519(kp); + + rspamd_cryptobox_nm(p->nm->nm, rk_25519->pk, sk_25519->sk, p->alg); + } + else { + struct rspamd_cryptobox_pubkey_nist *rk_nist = + RSPAMD_CRYPTOBOX_PUBKEY_NIST(p); + struct rspamd_cryptobox_keypair_nist *sk_nist = + RSPAMD_CRYPTOBOX_KEYPAIR_NIST(kp); + + rspamd_cryptobox_nm(p->nm->nm, rk_nist->pk, sk_nist->sk, p->alg); + } + + return p->nm->nm; +} + +const guchar * +rspamd_keypair_get_id(struct rspamd_cryptobox_keypair *kp) +{ + g_assert(kp != NULL); + + return kp->id; +} + +const ucl_object_t * +rspamd_keypair_get_extensions(struct rspamd_cryptobox_keypair *kp) +{ + g_assert(kp != NULL); + + return kp->extensions; +} + +const guchar * +rspamd_pubkey_get_id(struct rspamd_cryptobox_pubkey *pk) +{ + g_assert(pk != NULL); + + return pk->id; +} + +const guchar * +rspamd_pubkey_get_pk(struct rspamd_cryptobox_pubkey *pk, + guint *len) +{ + guchar *ret = NULL; + guint rlen; + + ret = rspamd_cryptobox_pubkey_pk(pk, &rlen); + + if (len) { + *len = rlen; + } + + return ret; +} + +static void +rspamd_keypair_print_component(guchar *data, gsize datalen, + GString *res, guint how, const gchar *description) +{ + gint olen, b32_len; + + if (how & RSPAMD_KEYPAIR_HUMAN) { + rspamd_printf_gstring(res, "%s: ", description); + } + + if (how & RSPAMD_KEYPAIR_BASE32) { + b32_len = (datalen * 8 / 5) + 2; + g_string_set_size(res, res->len + b32_len); + res->len -= b32_len; + olen = rspamd_encode_base32_buf(data, datalen, res->str + res->len, + res->len + b32_len - 1, RSPAMD_BASE32_DEFAULT); + + if (olen > 0) { + res->len += olen; + res->str[res->len] = '\0'; + } + } + else if (how & RSPAMD_KEYPAIR_HEX) { + rspamd_printf_gstring(res, "%*xs", (gint) datalen, data); + } + else { + g_string_append_len(res, data, datalen); + } + + if (how & RSPAMD_KEYPAIR_HUMAN) { + g_string_append_c(res, '\n'); + } +} + +GString * +rspamd_keypair_print(struct rspamd_cryptobox_keypair *kp, guint how) +{ + GString *res; + guint len; + gpointer p; + + g_assert(kp != NULL); + + res = g_string_sized_new(63); + + if ((how & RSPAMD_KEYPAIR_PUBKEY)) { + p = rspamd_cryptobox_keypair_pk(kp, &len); + rspamd_keypair_print_component(p, len, res, how, "Public key"); + } + if ((how & RSPAMD_KEYPAIR_PRIVKEY)) { + p = rspamd_cryptobox_keypair_sk(kp, &len); + rspamd_keypair_print_component(p, len, res, how, "Private key"); + } + if ((how & RSPAMD_KEYPAIR_ID_SHORT)) { + rspamd_keypair_print_component(kp->id, RSPAMD_KEYPAIR_SHORT_ID_LEN, + res, how, "Short key ID"); + } + if ((how & RSPAMD_KEYPAIR_ID)) { + rspamd_keypair_print_component(kp->id, sizeof(kp->id), res, how, "Key ID"); + } + + return res; +} + +GString * +rspamd_pubkey_print(struct rspamd_cryptobox_pubkey *pk, guint how) +{ + GString *res; + guint len; + gpointer p; + + g_assert(pk != NULL); + + res = g_string_sized_new(63); + + if ((how & RSPAMD_KEYPAIR_PUBKEY)) { + p = rspamd_cryptobox_pubkey_pk(pk, &len); + rspamd_keypair_print_component(p, len, res, how, "Public key"); + } + if ((how & RSPAMD_KEYPAIR_ID_SHORT)) { + rspamd_keypair_print_component(pk->id, RSPAMD_KEYPAIR_SHORT_ID_LEN, + res, how, "Short key ID"); + } + if ((how & RSPAMD_KEYPAIR_ID)) { + rspamd_keypair_print_component(pk->id, sizeof(pk->id), res, how, + "Key ID"); + } + + return res; +} + +const guchar * +rspamd_keypair_component(struct rspamd_cryptobox_keypair *kp, + guint ncomp, guint *len) +{ + guint rlen = 0; + const guchar *ret = NULL; + + g_assert(kp != NULL); + + switch (ncomp) { + case RSPAMD_KEYPAIR_COMPONENT_ID: + rlen = sizeof(kp->id); + ret = kp->id; + break; + case RSPAMD_KEYPAIR_COMPONENT_PK: + ret = rspamd_cryptobox_keypair_pk(kp, &rlen); + break; + case RSPAMD_KEYPAIR_COMPONENT_SK: + ret = rspamd_cryptobox_keypair_sk(kp, &rlen); + break; + } + + if (len) { + *len = rlen; + } + + return ret; +} + +struct rspamd_cryptobox_keypair * +rspamd_keypair_from_ucl(const ucl_object_t *obj) +{ + const ucl_object_t *privkey, *pubkey, *elt; + const gchar *str; + enum rspamd_cryptobox_keypair_type type = RSPAMD_KEYPAIR_KEX; + enum rspamd_cryptobox_mode mode = RSPAMD_CRYPTOBOX_MODE_25519; + gboolean is_hex = FALSE; + struct rspamd_cryptobox_keypair *kp; + guint len; + gsize ucl_len; + gint dec_len; + gpointer target; + + if (ucl_object_type(obj) != UCL_OBJECT) { + return NULL; + } + + elt = ucl_object_lookup(obj, "keypair"); + if (elt != NULL) { + obj = elt; + } + + pubkey = ucl_object_lookup_any(obj, "pubkey", "public", "public_key", + NULL); + if (pubkey == NULL || ucl_object_type(pubkey) != UCL_STRING) { + return NULL; + } + + privkey = ucl_object_lookup_any(obj, "privkey", "private", "private_key", + "secret", "secret_key", NULL); + if (privkey == NULL || ucl_object_type(privkey) != UCL_STRING) { + return NULL; + } + + /* Optional fields */ + elt = ucl_object_lookup(obj, "type"); + if (elt && ucl_object_type(elt) == UCL_STRING) { + str = ucl_object_tostring(elt); + + if (g_ascii_strcasecmp(str, "kex") == 0) { + type = RSPAMD_KEYPAIR_KEX; + } + else if (g_ascii_strcasecmp(str, "sign") == 0) { + type = RSPAMD_KEYPAIR_SIGN; + } + /* TODO: handle errors */ + } + + elt = ucl_object_lookup(obj, "algorithm"); + if (elt && ucl_object_type(elt) == UCL_STRING) { + str = ucl_object_tostring(elt); + + if (g_ascii_strcasecmp(str, "curve25519") == 0) { + mode = RSPAMD_CRYPTOBOX_MODE_25519; + } + else if (g_ascii_strcasecmp(str, "nistp256") == 0) { + mode = RSPAMD_CRYPTOBOX_MODE_NIST; + } + /* TODO: handle errors */ + } + + elt = ucl_object_lookup(obj, "encoding"); + if (elt && ucl_object_type(elt) == UCL_STRING) { + str = ucl_object_tostring(elt); + + if (g_ascii_strcasecmp(str, "hex") == 0) { + is_hex = TRUE; + } + /* TODO: handle errors */ + } + + kp = rspamd_cryptobox_keypair_alloc(type, mode); + kp->type = type; + kp->alg = mode; + REF_INIT_RETAIN(kp, rspamd_cryptobox_keypair_dtor); + g_assert(kp != NULL); + + target = rspamd_cryptobox_keypair_sk(kp, &len); + str = ucl_object_tolstring(privkey, &ucl_len); + + if (is_hex) { + dec_len = rspamd_decode_hex_buf(str, ucl_len, target, len); + } + else { + dec_len = rspamd_decode_base32_buf(str, ucl_len, target, len, RSPAMD_BASE32_DEFAULT); + } + + if (dec_len != (gint) len) { + rspamd_keypair_unref(kp); + + return NULL; + } + + target = rspamd_cryptobox_keypair_pk(kp, &len); + str = ucl_object_tolstring(pubkey, &ucl_len); + + if (is_hex) { + dec_len = rspamd_decode_hex_buf(str, ucl_len, target, len); + } + else { + dec_len = rspamd_decode_base32_buf(str, ucl_len, target, len, RSPAMD_BASE32_DEFAULT); + } + + if (dec_len != (gint) len) { + rspamd_keypair_unref(kp); + + return NULL; + } + + rspamd_cryptobox_hash(kp->id, target, len, NULL, 0); + + elt = ucl_object_lookup(obj, "extensions"); + if (elt && ucl_object_type(elt) == UCL_OBJECT) { + /* Use copy to avoid issues with the refcounts */ + kp->extensions = ucl_object_copy(elt); + } + + return kp; +} + +ucl_object_t * +rspamd_keypair_to_ucl(struct rspamd_cryptobox_keypair *kp, + enum rspamd_keypair_dump_flags flags) +{ + ucl_object_t *ucl_out, *elt; + gint how = 0; + GString *keypair_out; + const gchar *encoding; + + g_assert(kp != NULL); + + if (flags & RSPAMD_KEYPAIR_DUMP_HEX) { + how |= RSPAMD_KEYPAIR_HEX; + encoding = "hex"; + } + else { + how |= RSPAMD_KEYPAIR_BASE32; + encoding = "base32"; + } + + if (flags & RSPAMD_KEYPAIR_DUMP_FLATTENED) { + ucl_out = ucl_object_typed_new(UCL_OBJECT); + elt = ucl_out; + } + else { + ucl_out = ucl_object_typed_new(UCL_OBJECT); + elt = ucl_object_typed_new(UCL_OBJECT); + ucl_object_insert_key(ucl_out, elt, "keypair", 0, false); + } + + + /* pubkey part */ + keypair_out = rspamd_keypair_print(kp, + RSPAMD_KEYPAIR_PUBKEY | how); + ucl_object_insert_key(elt, + ucl_object_fromlstring(keypair_out->str, keypair_out->len), + "pubkey", 0, false); + g_string_free(keypair_out, TRUE); + + if (!(flags & RSPAMD_KEYPAIR_DUMP_NO_SECRET)) { + /* privkey part */ + keypair_out = rspamd_keypair_print(kp, + RSPAMD_KEYPAIR_PRIVKEY | how); + ucl_object_insert_key(elt, + ucl_object_fromlstring(keypair_out->str, keypair_out->len), + "privkey", 0, false); + g_string_free(keypair_out, TRUE); + } + + keypair_out = rspamd_keypair_print(kp, + RSPAMD_KEYPAIR_ID | how); + ucl_object_insert_key(elt, + ucl_object_fromlstring(keypair_out->str, keypair_out->len), + "id", 0, false); + g_string_free(keypair_out, TRUE); + + ucl_object_insert_key(elt, + ucl_object_fromstring(encoding), + "encoding", 0, false); + + ucl_object_insert_key(elt, + ucl_object_fromstring( + kp->alg == RSPAMD_CRYPTOBOX_MODE_NIST ? "nistp256" : "curve25519"), + "algorithm", 0, false); + + ucl_object_insert_key(elt, + ucl_object_fromstring( + kp->type == RSPAMD_KEYPAIR_KEX ? "kex" : "sign"), + "type", 0, false); + + if (kp->extensions) { + ucl_object_insert_key(elt, ucl_object_copy(kp->extensions), + "extensions", 0, false); + } + + return ucl_out; +} + +gboolean +rspamd_keypair_decrypt(struct rspamd_cryptobox_keypair *kp, + const guchar *in, gsize inlen, + guchar **out, gsize *outlen, + GError **err) +{ + const guchar *nonce, *mac, *data, *pubkey; + + g_assert(kp != NULL); + g_assert(in != NULL); + + if (kp->type != RSPAMD_KEYPAIR_KEX) { + g_set_error(err, rspamd_keypair_quark(), EINVAL, + "invalid keypair type"); + + return FALSE; + } + + if (inlen < sizeof(encrypted_magic) + rspamd_cryptobox_pk_bytes(kp->alg) + + rspamd_cryptobox_mac_bytes(kp->alg) + + rspamd_cryptobox_nonce_bytes(kp->alg)) { + g_set_error(err, rspamd_keypair_quark(), E2BIG, "invalid size: too small"); + + return FALSE; + } + + if (memcmp(in, encrypted_magic, sizeof(encrypted_magic)) != 0) { + g_set_error(err, rspamd_keypair_quark(), EINVAL, + "invalid magic"); + + return FALSE; + } + + /* Set pointers */ + pubkey = in + sizeof(encrypted_magic); + mac = pubkey + rspamd_cryptobox_pk_bytes(kp->alg); + nonce = mac + rspamd_cryptobox_mac_bytes(kp->alg); + data = nonce + rspamd_cryptobox_nonce_bytes(kp->alg); + + if (data - in >= inlen) { + g_set_error(err, rspamd_keypair_quark(), E2BIG, "invalid size: too small"); + + return FALSE; + } + + inlen -= data - in; + + /* Allocate memory for output */ + *out = g_malloc(inlen); + memcpy(*out, data, inlen); + + if (!rspamd_cryptobox_decrypt_inplace(*out, inlen, nonce, pubkey, + rspamd_keypair_component(kp, RSPAMD_KEYPAIR_COMPONENT_SK, NULL), + mac, kp->alg)) { + g_set_error(err, rspamd_keypair_quark(), EPERM, "verification failed"); + g_free(*out); + + return FALSE; + } + + if (outlen) { + *outlen = inlen; + } + + return TRUE; +} + +gboolean +rspamd_keypair_encrypt(struct rspamd_cryptobox_keypair *kp, + const guchar *in, gsize inlen, + guchar **out, gsize *outlen, + GError **err) +{ + guchar *nonce, *mac, *data, *pubkey; + struct rspamd_cryptobox_keypair *local; + gsize olen; + + g_assert(kp != NULL); + g_assert(in != NULL); + + if (kp->type != RSPAMD_KEYPAIR_KEX) { + g_set_error(err, rspamd_keypair_quark(), EINVAL, + "invalid keypair type"); + + return FALSE; + } + + local = rspamd_keypair_new(kp->type, kp->alg); + + olen = inlen + sizeof(encrypted_magic) + + rspamd_cryptobox_pk_bytes(kp->alg) + + rspamd_cryptobox_mac_bytes(kp->alg) + + rspamd_cryptobox_nonce_bytes(kp->alg); + *out = g_malloc(olen); + memcpy(*out, encrypted_magic, sizeof(encrypted_magic)); + pubkey = *out + sizeof(encrypted_magic); + mac = pubkey + rspamd_cryptobox_pk_bytes(kp->alg); + nonce = mac + rspamd_cryptobox_mac_bytes(kp->alg); + data = nonce + rspamd_cryptobox_nonce_bytes(kp->alg); + + ottery_rand_bytes(nonce, rspamd_cryptobox_nonce_bytes(kp->alg)); + memcpy(data, in, inlen); + memcpy(pubkey, rspamd_keypair_component(kp, RSPAMD_KEYPAIR_COMPONENT_PK, NULL), + rspamd_cryptobox_pk_bytes(kp->alg)); + rspamd_cryptobox_encrypt_inplace(data, inlen, nonce, pubkey, + rspamd_keypair_component(local, RSPAMD_KEYPAIR_COMPONENT_SK, NULL), + mac, kp->alg); + rspamd_keypair_unref(local); + + if (outlen) { + *outlen = olen; + } + + return TRUE; +} + +gboolean +rspamd_pubkey_encrypt(struct rspamd_cryptobox_pubkey *pk, + const guchar *in, gsize inlen, + guchar **out, gsize *outlen, + GError **err) +{ + guchar *nonce, *mac, *data, *pubkey; + struct rspamd_cryptobox_keypair *local; + gsize olen; + + g_assert(pk != NULL); + g_assert(in != NULL); + + if (pk->type != RSPAMD_KEYPAIR_KEX) { + g_set_error(err, rspamd_keypair_quark(), EINVAL, + "invalid pubkey type"); + + return FALSE; + } + + local = rspamd_keypair_new(pk->type, pk->alg); + + olen = inlen + sizeof(encrypted_magic) + + rspamd_cryptobox_pk_bytes(pk->alg) + + rspamd_cryptobox_mac_bytes(pk->alg) + + rspamd_cryptobox_nonce_bytes(pk->alg); + *out = g_malloc(olen); + memcpy(*out, encrypted_magic, sizeof(encrypted_magic)); + pubkey = *out + sizeof(encrypted_magic); + mac = pubkey + rspamd_cryptobox_pk_bytes(pk->alg); + nonce = mac + rspamd_cryptobox_mac_bytes(pk->alg); + data = nonce + rspamd_cryptobox_nonce_bytes(pk->alg); + + ottery_rand_bytes(nonce, rspamd_cryptobox_nonce_bytes(pk->alg)); + memcpy(data, in, inlen); + memcpy(pubkey, rspamd_pubkey_get_pk(pk, NULL), + rspamd_cryptobox_pk_bytes(pk->alg)); + rspamd_cryptobox_encrypt_inplace(data, inlen, nonce, pubkey, + rspamd_keypair_component(local, RSPAMD_KEYPAIR_COMPONENT_SK, NULL), + mac, pk->alg); + rspamd_keypair_unref(local); + + if (outlen) { + *outlen = olen; + } + + return TRUE; +}
\ No newline at end of file diff --git a/src/libcryptobox/keypair.h b/src/libcryptobox/keypair.h new file mode 100644 index 0000000..64461b7 --- /dev/null +++ b/src/libcryptobox/keypair.h @@ -0,0 +1,317 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBCRYPTOBOX_KEYPAIR_H_ +#define SRC_LIBCRYPTOBOX_KEYPAIR_H_ + +#include "config.h" +#include "cryptobox.h" +#include "ucl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Keypair type + */ +enum rspamd_cryptobox_keypair_type { + RSPAMD_KEYPAIR_KEX = 0, + RSPAMD_KEYPAIR_SIGN +}; + +extern const guchar encrypted_magic[7]; + +/** + * Opaque structure for the full (public + private) keypair + */ +struct rspamd_cryptobox_keypair; +/** + * Opaque structure for public only keypair + */ +struct rspamd_cryptobox_pubkey; + +/** + * Creates new full keypair + * @param type type of the keypair + * @param alg algorithm for the keypair + * @return fresh keypair generated + */ +struct rspamd_cryptobox_keypair *rspamd_keypair_new( + enum rspamd_cryptobox_keypair_type type, + enum rspamd_cryptobox_mode alg); + +/** + * Increase refcount for the specific keypair + * @param kp + * @return + */ +struct rspamd_cryptobox_keypair *rspamd_keypair_ref( + struct rspamd_cryptobox_keypair *kp); + +/** + * Decrease refcount for the specific keypair (or destroy when refcount == 0) + * @param kp + */ +void rspamd_keypair_unref(struct rspamd_cryptobox_keypair *kp); + +/** + * Increase refcount for the specific pubkey + * @param kp + * @return + */ +struct rspamd_cryptobox_pubkey *rspamd_pubkey_ref( + struct rspamd_cryptobox_pubkey *kp); + +/** + * Load pubkey from base32 string + * @param b32 input string + * @param type type of key (signing or kex) + * @param alg algorithm of the key (nist or curve25519) + * @return new pubkey or NULL in case of error + */ +struct rspamd_cryptobox_pubkey *rspamd_pubkey_from_base32(const gchar *b32, + gsize len, + enum rspamd_cryptobox_keypair_type type, + enum rspamd_cryptobox_mode alg); + +/** + * Load pubkey from hex string + * @param hex input string + * @param type type of key (signing or kex) + * @param alg algorithm of the key (nist or curve25519) + * @return new pubkey or NULL in case of error + */ +struct rspamd_cryptobox_pubkey *rspamd_pubkey_from_hex(const gchar *hex, + gsize len, + enum rspamd_cryptobox_keypair_type type, + enum rspamd_cryptobox_mode alg); + +/** + * Load pubkey from raw chunk string + * @param hex input data + * @param type type of key (signing or kex) + * @param alg algorithm of the key (nist or curve25519) + * @return new pubkey or NULL in case of error + */ +struct rspamd_cryptobox_pubkey *rspamd_pubkey_from_bin(const guchar *raw, + gsize len, + enum rspamd_cryptobox_keypair_type type, + enum rspamd_cryptobox_mode alg); + + +/** + * Decrease refcount for the specific pubkey (or destroy when refcount == 0) + * @param kp + */ +void rspamd_pubkey_unref(struct rspamd_cryptobox_pubkey *kp); + +/** + * Get type of keypair + */ +enum rspamd_cryptobox_keypair_type rspamd_keypair_type( + struct rspamd_cryptobox_keypair *kp); + +/** + * Get type of pubkey + */ +enum rspamd_cryptobox_keypair_type rspamd_pubkey_type( + struct rspamd_cryptobox_pubkey *p); + +/** + * Get algorithm of keypair + */ +enum rspamd_cryptobox_mode rspamd_keypair_alg(struct rspamd_cryptobox_keypair *kp); + +/** + * Get algorithm of pubkey + */ +enum rspamd_cryptobox_mode rspamd_pubkey_alg(struct rspamd_cryptobox_pubkey *p); + +/** + * Get cached NM for this specific pubkey + * @param p + * @return + */ +const guchar *rspamd_pubkey_get_nm(struct rspamd_cryptobox_pubkey *p, + struct rspamd_cryptobox_keypair *kp); + +/** + * Calculate and store nm value for the specified local key (performs ECDH) + * @param p + * @return + */ +const guchar *rspamd_pubkey_calculate_nm(struct rspamd_cryptobox_pubkey *p, + struct rspamd_cryptobox_keypair *kp); + +/** + * Get raw public key id for a specified keypair (rspamd_cryptobox_HASHBYTES) + * @param kp + * @return + */ +const guchar *rspamd_keypair_get_id(struct rspamd_cryptobox_keypair *kp); + +/** + * Returns keypair extensions if any + * @param kp + * @return + */ +const ucl_object_t *rspamd_keypair_get_extensions(struct rspamd_cryptobox_keypair *kp); + +/** + * Get raw public key id for a specified key (rspamd_cryptobox_HASHBYTES) + * @param kp + * @return + */ +const guchar *rspamd_pubkey_get_id(struct rspamd_cryptobox_pubkey *pk); + +/** + * Get raw public key from pubkey opaque structure + * @param pk + * @param len + * @return + */ +const guchar *rspamd_pubkey_get_pk(struct rspamd_cryptobox_pubkey *pk, + guint *len); + +/** Short ID characters count */ +#define RSPAMD_KEYPAIR_SHORT_ID_LEN 5 +/** Print pubkey */ +#define RSPAMD_KEYPAIR_PUBKEY 0x1 +/** Print secret key */ +#define RSPAMD_KEYPAIR_PRIVKEY 0x2 +/** Print key id */ +#define RSPAMD_KEYPAIR_ID 0x4 +/** Print short key id */ +#define RSPAMD_KEYPAIR_ID_SHORT 0x8 +/** Encode output with base 32 */ +#define RSPAMD_KEYPAIR_BASE32 0x10 +/** Human readable output */ +#define RSPAMD_KEYPAIR_HUMAN 0x20 +#define RSPAMD_KEYPAIR_HEX 0x40 + +/** + * Print keypair encoding it if needed + * @param key key to print + * @param how flags that specifies printing behaviour + * @return newly allocated string with keypair + */ +GString *rspamd_keypair_print(struct rspamd_cryptobox_keypair *kp, + guint how); + +/** + * Print pubkey encoding it if needed + * @param key key to print + * @param how flags that specifies printing behaviour + * @return newly allocated string with keypair + */ +GString *rspamd_pubkey_print(struct rspamd_cryptobox_pubkey *pk, + guint how); + +/** Get keypair pubkey ID */ +#define RSPAMD_KEYPAIR_COMPONENT_ID 0 +/** Get keypair public key */ +#define RSPAMD_KEYPAIR_COMPONENT_PK 1 +/** Get keypair private key */ +#define RSPAMD_KEYPAIR_COMPONENT_SK 2 + +/** + * Get specific component of a keypair + * @param kp keypair + * @param ncomp component number + * @param len length of input + * @return raw content of the component + */ +const guchar *rspamd_keypair_component(struct rspamd_cryptobox_keypair *kp, + guint ncomp, guint *len); + +/** + * Create a new keypair from ucl object + * @param obj object to load + * @return new structure or NULL if an object is invalid + */ +struct rspamd_cryptobox_keypair *rspamd_keypair_from_ucl(const ucl_object_t *obj); + + +enum rspamd_keypair_dump_flags { + RSPAMD_KEYPAIR_DUMP_DEFAULT = 0, + RSPAMD_KEYPAIR_DUMP_HEX = 1u << 0u, + RSPAMD_KEYPAIR_DUMP_NO_SECRET = 1u << 1u, + RSPAMD_KEYPAIR_DUMP_FLATTENED = 1u << 2u, +}; + +/** + * Converts keypair to ucl object + * @param kp + * @return + */ +ucl_object_t *rspamd_keypair_to_ucl(struct rspamd_cryptobox_keypair *kp, + enum rspamd_keypair_dump_flags flags); + + +/** + * Decrypts data using keypair and a pubkey stored in in, in must start from + * `encrypted_magic` constant + * @param kp keypair + * @param in raw input + * @param inlen input length + * @param out output (allocated internally using g_malloc) + * @param outlen output size + * @return TRUE if decryption is completed, out must be freed in this case + */ +gboolean rspamd_keypair_decrypt(struct rspamd_cryptobox_keypair *kp, + const guchar *in, gsize inlen, + guchar **out, gsize *outlen, + GError **err); + +/** + * Encrypts data usign specific keypair. + * This method actually generates ephemeral local keypair, use public key from + * the remote keypair and encrypts data + * @param kp keypair + * @param in raw input + * @param inlen input length + * @param out output (allocated internally using g_malloc) + * @param outlen output size + * @param err pointer to error + * @return TRUE if encryption has been completed, out must be freed in this case + */ +gboolean rspamd_keypair_encrypt(struct rspamd_cryptobox_keypair *kp, + const guchar *in, gsize inlen, + guchar **out, gsize *outlen, + GError **err); + +/** + * Encrypts data usign specific pubkey (must have KEX type). + * This method actually generates ephemeral local keypair, use public key from + * the remote keypair and encrypts data + * @param kp keypair + * @param in raw input + * @param inlen input length + * @param out output (allocated internally using g_malloc) + * @param outlen output size + * @param err pointer to error + * @return TRUE if encryption has been completed, out must be freed in this case + */ +gboolean rspamd_pubkey_encrypt(struct rspamd_cryptobox_pubkey *pk, + const guchar *in, gsize inlen, + guchar **out, gsize *outlen, + GError **err); + +#ifdef __cplusplus +} +#endif + +#endif /* SRC_LIBCRYPTOBOX_KEYPAIR_H_ */ diff --git a/src/libcryptobox/keypair_private.h b/src/libcryptobox/keypair_private.h new file mode 100644 index 0000000..16e17e0 --- /dev/null +++ b/src/libcryptobox/keypair_private.h @@ -0,0 +1,143 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef KEYPAIR_PRIVATE_H_ +#define KEYPAIR_PRIVATE_H_ + +#include "config.h" +#include "ref.h" +#include "cryptobox.h" + +#ifdef __cplusplus +extern "C" { +#endif +/* + * KEX cached data + */ +struct rspamd_cryptobox_nm { + guchar nm[rspamd_cryptobox_MAX_NMBYTES]; + guint64 sk_id; /* Used to store secret key id */ + ref_entry_t ref; +}; + +/* + * Generic keypair + */ +struct rspamd_cryptobox_keypair { + guchar id[rspamd_cryptobox_HASHBYTES]; + enum rspamd_cryptobox_keypair_type type; + enum rspamd_cryptobox_mode alg; + ucl_object_t *extensions; + ref_entry_t ref; +}; + +/* + * NIST p256 ecdh keypair + */ +#define RSPAMD_CRYPTOBOX_KEYPAIR_NIST(x) ((struct rspamd_cryptobox_keypair_nist *) (x)) +struct rspamd_cryptobox_keypair_nist { + struct rspamd_cryptobox_keypair parent; + guchar sk[32]; + guchar pk[65]; +}; + +/* + * Curve25519 ecdh keypair + */ +#define RSPAMD_CRYPTOBOX_KEYPAIR_25519(x) ((struct rspamd_cryptobox_keypair_25519 *) (x)) +struct rspamd_cryptobox_keypair_25519 { + struct rspamd_cryptobox_keypair parent; + guchar sk[32]; + guchar pk[32]; +}; + +/* + * NIST p256 ecdsa keypair + */ +#define RSPAMD_CRYPTOBOX_KEYPAIR_SIG_NIST(x) ((struct rspamd_cryptobox_keypair_sig_nist *) (x)) +struct rspamd_cryptobox_keypair_sig_nist { + struct rspamd_cryptobox_keypair parent; + guchar sk[32]; + guchar pk[65]; +}; + +/* + * Ed25519 keypair + */ +#define RSPAMD_CRYPTOBOX_KEYPAIR_SIG_25519(x) ((struct rspamd_cryptobox_keypair_sig_25519 *) (x)) +struct rspamd_cryptobox_keypair_sig_25519 { + struct rspamd_cryptobox_keypair parent; + guchar sk[64]; + guchar pk[32]; +}; + +/* + * Public component of the keypair + */ +struct rspamd_cryptobox_pubkey { + guchar id[rspamd_cryptobox_HASHBYTES]; + struct rspamd_cryptobox_nm *nm; + enum rspamd_cryptobox_keypair_type type; + enum rspamd_cryptobox_mode alg; + ref_entry_t ref; +}; + +/* + * Public p256 ecdh + */ +#define RSPAMD_CRYPTOBOX_PUBKEY_NIST(x) ((struct rspamd_cryptobox_pubkey_nist *) (x)) +struct rspamd_cryptobox_pubkey_nist { + struct rspamd_cryptobox_pubkey parent; + guchar pk[65]; +}; + +/* + * Public curve25519 ecdh + */ +#define RSPAMD_CRYPTOBOX_PUBKEY_25519(x) ((struct rspamd_cryptobox_pubkey_25519 *) (x)) +struct rspamd_cryptobox_pubkey_25519 { + struct rspamd_cryptobox_pubkey parent; + guchar pk[32]; +}; + +/* + * Public p256 ecdsa + */ +#define RSPAMD_CRYPTOBOX_PUBKEY_SIG_NIST(x) ((struct rspamd_cryptobox_pubkey_sig_nist *) (x)) +struct rspamd_cryptobox_pubkey_sig_nist { + struct rspamd_cryptobox_pubkey parent; + guchar pk[65]; +}; + +/* + * Public ed25519 + */ +#define RSPAMD_CRYPTOBOX_PUBKEY_SIG_25519(x) ((struct rspamd_cryptobox_pubkey_sig_25519 *) (x)) +struct rspamd_cryptobox_pubkey_sig_25519 { + struct rspamd_cryptobox_pubkey parent; + guchar pk[32]; +}; + +void rspamd_cryptobox_nm_dtor(struct rspamd_cryptobox_nm *nm); + +void rspamd_cryptobox_keypair_dtor(struct rspamd_cryptobox_keypair *kp); + +void rspamd_cryptobox_pubkey_dtor(struct rspamd_cryptobox_pubkey *p); + +#ifdef __cplusplus +} +#endif + +#endif /* KEYPAIR_PRIVATE_H_ */ diff --git a/src/libcryptobox/keypairs_cache.c b/src/libcryptobox/keypairs_cache.c new file mode 100644 index 0000000..0616bb9 --- /dev/null +++ b/src/libcryptobox/keypairs_cache.c @@ -0,0 +1,141 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "config.h" +#include "keypairs_cache.h" +#include "keypair_private.h" +#include "libutil/util.h" +#include "hash.h" + +struct rspamd_keypair_elt { + struct rspamd_cryptobox_nm *nm; + guchar pair[rspamd_cryptobox_HASHBYTES * 2]; +}; + +struct rspamd_keypair_cache { + rspamd_lru_hash_t *hash; +}; + +static void +rspamd_keypair_destroy(gpointer ptr) +{ + struct rspamd_keypair_elt *elt = (struct rspamd_keypair_elt *) ptr; + + REF_RELEASE(elt->nm); + g_free(elt); +} + +static guint +rspamd_keypair_hash(gconstpointer ptr) +{ + struct rspamd_keypair_elt *elt = (struct rspamd_keypair_elt *) ptr; + + return rspamd_cryptobox_fast_hash(elt->pair, sizeof(elt->pair), + rspamd_hash_seed()); +} + +static gboolean +rspamd_keypair_equal(gconstpointer p1, gconstpointer p2) +{ + struct rspamd_keypair_elt *e1 = (struct rspamd_keypair_elt *) p1, + *e2 = (struct rspamd_keypair_elt *) p2; + + return memcmp(e1->pair, e2->pair, sizeof(e1->pair)) == 0; +} + +struct rspamd_keypair_cache * +rspamd_keypair_cache_new(guint max_items) +{ + struct rspamd_keypair_cache *c; + + g_assert(max_items > 0); + + c = g_malloc0(sizeof(*c)); + c->hash = rspamd_lru_hash_new_full(max_items, NULL, + rspamd_keypair_destroy, rspamd_keypair_hash, rspamd_keypair_equal); + + return c; +} + +void rspamd_keypair_cache_process(struct rspamd_keypair_cache *c, + struct rspamd_cryptobox_keypair *lk, + struct rspamd_cryptobox_pubkey *rk) +{ + struct rspamd_keypair_elt search, *new; + + g_assert(lk != NULL); + g_assert(rk != NULL); + g_assert(rk->alg == lk->alg); + g_assert(rk->type == lk->type); + g_assert(rk->type == RSPAMD_KEYPAIR_KEX); + + memset(&search, 0, sizeof(search)); + memcpy(search.pair, rk->id, rspamd_cryptobox_HASHBYTES); + memcpy(&search.pair[rspamd_cryptobox_HASHBYTES], lk->id, + rspamd_cryptobox_HASHBYTES); + new = rspamd_lru_hash_lookup(c->hash, &search, time(NULL)); + + if (rk->nm) { + REF_RELEASE(rk->nm); + rk->nm = NULL; + } + + if (new == NULL) { + new = g_malloc0(sizeof(*new)); + + if (posix_memalign((void **) &new->nm, 32, sizeof(*new->nm)) != 0) { + abort(); + } + + REF_INIT_RETAIN(new->nm, rspamd_cryptobox_nm_dtor); + + memcpy(new->pair, rk->id, rspamd_cryptobox_HASHBYTES); + memcpy(&new->pair[rspamd_cryptobox_HASHBYTES], lk->id, + rspamd_cryptobox_HASHBYTES); + memcpy(&new->nm->sk_id, lk->id, sizeof(guint64)); + + if (rk->alg == RSPAMD_CRYPTOBOX_MODE_25519) { + struct rspamd_cryptobox_pubkey_25519 *rk_25519 = + RSPAMD_CRYPTOBOX_PUBKEY_25519(rk); + struct rspamd_cryptobox_keypair_25519 *sk_25519 = + RSPAMD_CRYPTOBOX_KEYPAIR_25519(lk); + + rspamd_cryptobox_nm(new->nm->nm, rk_25519->pk, sk_25519->sk, rk->alg); + } + else { + struct rspamd_cryptobox_pubkey_nist *rk_nist = + RSPAMD_CRYPTOBOX_PUBKEY_NIST(rk); + struct rspamd_cryptobox_keypair_nist *sk_nist = + RSPAMD_CRYPTOBOX_KEYPAIR_NIST(lk); + + rspamd_cryptobox_nm(new->nm->nm, rk_nist->pk, sk_nist->sk, rk->alg); + } + + rspamd_lru_hash_insert(c->hash, new, new, time(NULL), -1); + } + + g_assert(new != NULL); + + rk->nm = new->nm; + REF_RETAIN(rk->nm); +} + +void rspamd_keypair_cache_destroy(struct rspamd_keypair_cache *c) +{ + if (c != NULL) { + rspamd_lru_hash_destroy(c->hash); + g_free(c); + } +} diff --git a/src/libcryptobox/keypairs_cache.h b/src/libcryptobox/keypairs_cache.h new file mode 100644 index 0000000..96e356a --- /dev/null +++ b/src/libcryptobox/keypairs_cache.h @@ -0,0 +1,57 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef KEYPAIRS_CACHE_H_ +#define KEYPAIRS_CACHE_H_ + +#include "config.h" +#include "keypair.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +struct rspamd_keypair_cache; + +/** + * Create new keypair cache of the specified size + * @param max_items defines maximum count of elements in the cache + * @return new cache + */ +struct rspamd_keypair_cache *rspamd_keypair_cache_new(guint max_items); + + +/** + * Process local and remote keypair setting beforenm value as appropriate + * @param c cache of keypairs + * @param lk local key + * @param rk remote key + */ +void rspamd_keypair_cache_process(struct rspamd_keypair_cache *c, + struct rspamd_cryptobox_keypair *lk, + struct rspamd_cryptobox_pubkey *rk); + +/** + * Destroy old keypair cache + * @param c cache object + */ +void rspamd_keypair_cache_destroy(struct rspamd_keypair_cache *c); + +#ifdef __cplusplus +} +#endif + +#endif /* KEYPAIRS_CACHE_H_ */ diff --git a/src/libcryptobox/macro.S b/src/libcryptobox/macro.S new file mode 100644 index 0000000..f213f15 --- /dev/null +++ b/src/libcryptobox/macro.S @@ -0,0 +1,176 @@ +#include "platform_config.h" + +#if !defined(HAVE_SLASHMACRO) && !defined(HAVE_DOLLARMACRO) + #error Unknown gnu as macro parameter convention! Run ./configure +#endif + +#if defined(__MACH__) + .macro FN name + #if defined(HAVE_SLASHMACRO) + \name: + _\name: + #elif defined(HAVE_DOLLARMACRO) + $0: + _$0: + #endif + .endm + + .macro FN_EXT name, args, xmmused + #if defined(HAVE_SLASHMACRO) + FN \name + #elif defined(HAVE_DOLLARMACRO) + FN $0 + #endif + .endm + + .macro FN_END name + .endm + + .macro HIDDEN name + #if defined(HAVE_AS_PRIVATE_EXTERN) + #if defined(HAVE_SLASHMACRO) + .private_extern \name + .private_extern _\name + #elif defined(HAVE_DOLLARMACRO) + .private_extern $0 + .private_extern _$0 + #endif + #endif + .endm +#else + .macro FN name + \name: + _\name: + .endm + + .macro FN_EXT name, args, xmmused + FN \name + .endm + + .macro FN_END name + .size \name, .-\name + .size _\name, .-_\name + .type \name, @function + .type _\name, @function + .endm + + .macro HIDDEN name + #if defined(HAVE_AS_HIDDEN) + .hidden \name + .hidden _\name + #endif + .endm + + /* set NX for stack */ + .section .note.GNU-stack,"",@progbits +#endif +#if defined(__MACH__) + .macro SECTION_TEXT + .section __TEXT,__text,regular + .endm + + .macro SECTION_RODATA + .section __TEXT,__text,regular + .endm +#else + /* put everything in the code segment to simplify things */ + .macro SECTION_TEXT + .text + .endm + + .macro SECTION_RODATA + .text + .endm +#endif + +/* declare a global function */ +.macro GLOBAL name +#if defined(HAVE_SLASHMACRO) + .globl \name + .globl _\name +#elif defined(HAVE_DOLLARMACRO) + .globl $0 + .globl _$0 +#endif +.endm + +.macro FN_LOCAL_PREFIX name +#if defined(HAVE_SLASHMACRO) + FN LOCAL_PREFIX(\name) +#elif defined(HAVE_DOLLARMACRO) + FN LOCAL_PREFIX($0) +#endif +.endm + +.macro FN_EXT_LOCAL_PREFIX name, args, xmmused +#if defined(HAVE_SLASHMACRO) + FN_EXT LOCAL_PREFIX(\name), \args, \xmmused +#elif defined(HAVE_DOLLARMACRO) + FN_EXT LOCAL_PREFIX($0), $1, $2 +#endif +.endm + +.macro FN_END_LOCAL_PREFIX name +#if defined(HAVE_SLASHMACRO) + FN_END LOCAL_PREFIX(\name) +#elif defined(HAVE_DOLLARMACRO) + FN_END LOCAL_PREFIX($0) +#endif +.endm + +.macro GLOBAL_LOCAL_PREFIX name +#if defined(HAVE_SLASHMACRO) + GLOBAL LOCAL_PREFIX(\name) + HIDDEN LOCAL_PREFIX(\name) +#elif defined(HAVE_DOLLARMACRO) + GLOBAL LOCAL_PREFIX($0) + HIDDEN LOCAL_PREFIX($0) +#endif +.endm + +.macro GLOBAL_HIDDEN_FN name +#if defined(HAVE_SLASHMACRO) + GLOBAL \name + HIDDEN \name + FN \name +#elif defined(HAVE_DOLLARMACRO) + GLOBAL $0 + HIDDEN $0 + FN $0 +#endif +.endm + +.macro GLOBAL_HIDDEN_FN_EXT name, args, xmmused +#if defined(HAVE_SLASHMACRO) + GLOBAL \name + HIDDEN \name + FN_EXT \name, \args, \xmmused +#elif defined(HAVE_DOLLARMACRO) + GLOBAL $0 + HIDDEN $0 + FN_EXT $0, $1, $2 +#endif +.endm + +/* pic support */ +.macro LOAD_VAR_PIC var, reg +#if !defined(__LP64__) + #if defined(HAVE_SLASHMACRO) + call 1f + 1: + popl \reg + leal \var - 1b(\reg), \reg + #elif defined(HAVE_DOLLARMACRO) + call 1f + 1: + popl $1 + leal $0 - 1b($1), $1 + #endif +#else + #if defined(HAVE_SLASHMACRO) + leaq \var(%rip), \reg + #elif defined(HAVE_DOLLARMACRO) + leaq $0(%rip), $1 + #endif +#endif +.endm diff --git a/src/libcryptobox/platform_config.h.in b/src/libcryptobox/platform_config.h.in new file mode 100644 index 0000000..7b7d17d --- /dev/null +++ b/src/libcryptobox/platform_config.h.in @@ -0,0 +1,16 @@ +#ifndef PLATFORM_H_CONFIG +#define PLATFORM_H_CONFIG + +#define ARCH "${ARCH}" +#define CMAKE_ARCH_${ARCH} 1 +#cmakedefine HAVE_AVX2 1 +#cmakedefine HAVE_AVX 1 +#cmakedefine HAVE_SSE2 1 +#cmakedefine HAVE_SSE41 1 +#cmakedefine HAVE_SSE42 1 +#cmakedefine HAVE_SSE3 1 +#cmakedefine HAVE_SSSE3 1 +#cmakedefine HAVE_SLASHMACRO 1 +#cmakedefine HAVE_DOLLARMACRO 1 + +#endif
\ No newline at end of file |