summaryrefslogtreecommitdiffstats
path: root/src/libcryptobox/base64
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
commit133a45c109da5310add55824db21af5239951f93 (patch)
treeba6ac4c0a950a0dda56451944315d66409923918 /src/libcryptobox/base64
parentInitial commit. (diff)
downloadrspamd-133a45c109da5310add55824db21af5239951f93.tar.xz
rspamd-133a45c109da5310add55824db21af5239951f93.zip
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/libcryptobox/base64')
-rw-r--r--src/libcryptobox/base64/avx2.c287
-rw-r--r--src/libcryptobox/base64/base64.c445
-rw-r--r--src/libcryptobox/base64/base64.h31
-rw-r--r--src/libcryptobox/base64/ref.c241
-rw-r--r--src/libcryptobox/base64/sse42.c268
5 files changed, 1272 insertions, 0 deletions
diff --git a/src/libcryptobox/base64/avx2.c b/src/libcryptobox/base64/avx2.c
new file mode 100644
index 0000000..38abffc
--- /dev/null
+++ b/src/libcryptobox/base64/avx2.c
@@ -0,0 +1,287 @@
+/*-
+ * Copyright 2018 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*-
+Copyright (c) 2013-2015, Alfred Klomp
+Copyright (c) 2018, Vsevolod Stakhov
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "cryptobox.h"
+
+extern const uint8_t base64_table_dec[256];
+
+#ifdef RSPAMD_HAS_TARGET_ATTR
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC push_options
+#pragma GCC target("avx2")
+#endif
+#ifndef __SSE2__
+#define __SSE2__
+#endif
+#ifndef __SSE__
+#define __SSE__
+#endif
+#ifndef __SSE4_2__
+#define __SSE4_2__
+#endif
+#ifndef __SSE4_1__
+#define __SSE4_1__
+#endif
+#ifndef __SSEE3__
+#define __SSEE3__
+#endif
+#ifndef __AVX__
+#define __AVX__
+#endif
+#ifndef __AVX2__
+#define __AVX2__
+#endif
+
+#include <immintrin.h>
+
+#define CMPGT(s, n) _mm256_cmpgt_epi8((s), _mm256_set1_epi8(n))
+#define CMPEQ(s, n) _mm256_cmpeq_epi8((s), _mm256_set1_epi8(n))
+#define REPLACE(s, n) _mm256_and_si256((s), _mm256_set1_epi8(n))
+#define RANGE(s, a, b) _mm256_andnot_si256(CMPGT((s), (b)), CMPGT((s), (a) -1))
+
+static inline __m256i
+dec_reshuffle(__m256i in) __attribute__((__target__("avx2")));
+
+static inline __m256i
+dec_reshuffle(__m256i in)
+{
+ // in, lower lane, bits, upper case are most significant bits, lower case are least significant bits:
+ // 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
+ // 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
+ // 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
+ // 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
+
+ const __m256i merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140));
+ // 0000kkkk LLllllll 0000JJJJ JJjjKKKK
+ // 0000hhhh IIiiiiii 0000GGGG GGggHHHH
+ // 0000eeee FFffffff 0000DDDD DDddEEEE
+ // 0000bbbb CCcccccc 0000AAAA AAaaBBBB
+
+ __m256i out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000));
+ // 00000000 JJJJJJjj KKKKkkkk LLllllll
+ // 00000000 GGGGGGgg HHHHhhhh IIiiiiii
+ // 00000000 DDDDDDdd EEEEeeee FFffffff
+ // 00000000 AAAAAAaa BBBBbbbb CCcccccc
+
+ // Pack bytes together in each lane:
+ out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
+ 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
+ 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
+ // 00000000 00000000 00000000 00000000
+ // LLllllll KKKKkkkk JJJJJJjj IIiiiiii
+ // HHHHhhhh GGGGGGgg FFffffff EEEEeeee
+ // DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa
+
+ // Pack lanes
+ return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1));
+}
+
+
+#define INNER_LOOP_AVX2 \
+ while (inlen >= 45) { \
+ __m256i str = _mm256_loadu_si256((__m256i *) c); \
+ const __m256i lut_lo = _mm256_setr_epi8( \
+ 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \
+ 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A, \
+ 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \
+ 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); \
+ const __m256i lut_hi = _mm256_setr_epi8( \
+ 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, \
+ 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); \
+ const __m256i lut_roll = _mm256_setr_epi8( \
+ 0, 16, 19, 4, -65, -65, -71, -71, \
+ 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 16, 19, 4, -65, -65, -71, -71, \
+ 0, 0, 0, 0, 0, 0, 0, 0); \
+ const __m256i mask_2F = _mm256_set1_epi8(0x2f); \
+ const __m256i hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), mask_2F); \
+ const __m256i lo_nibbles = _mm256_and_si256(str, mask_2F); \
+ const __m256i hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles); \
+ const __m256i lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles); \
+ const __m256i eq_2F = _mm256_cmpeq_epi8(str, mask_2F); \
+ const __m256i roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2F, hi_nibbles)); \
+ if (!_mm256_testz_si256(lo, hi)) { \
+ seen_error = true; \
+ break; \
+ } \
+ str = _mm256_add_epi8(str, roll); \
+ str = dec_reshuffle(str); \
+ _mm256_storeu_si256((__m256i *) o, str); \
+ c += 32; \
+ o += 24; \
+ outl += 24; \
+ inlen -= 32; \
+ }
+
+int base64_decode_avx2(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen) __attribute__((__target__("avx2")));
+int base64_decode_avx2(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen)
+{
+ ssize_t ret = 0;
+ const uint8_t *c = (const uint8_t *) in;
+ uint8_t *o = (uint8_t *) out;
+ uint8_t q, carry;
+ size_t outl = 0;
+ size_t leftover = 0;
+ bool seen_error = false;
+
+repeat:
+ switch (leftover) {
+ for (;;) {
+ case 0:
+ if (G_LIKELY(!seen_error)) {
+ INNER_LOOP_AVX2
+ }
+
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ ret = 0;
+ break;
+ }
+ carry = q << 2;
+ leftover++;
+
+ case 1:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ ret = 0;
+ break;
+ }
+ *o++ = carry | (q >> 4);
+ carry = q << 4;
+ leftover++;
+ outl++;
+
+ case 2:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ leftover++;
+
+ if (q == 254) {
+ if (inlen-- != 0) {
+ leftover = 0;
+ q = base64_table_dec[*c++];
+ ret = ((q == 254) && (inlen == 0)) ? 1 : 0;
+ break;
+ }
+ else {
+ ret = 1;
+ break;
+ }
+ }
+ else {
+ leftover--;
+ }
+ /* If we get here, there was an error: */
+ break;
+ }
+ *o++ = carry | (q >> 2);
+ carry = q << 6;
+ leftover++;
+ outl++;
+
+ case 3:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ /*
+ * When q == 254, the input char is '='. Return 1 and EOF.
+ * When q == 255, the input char is invalid. Return 0 and EOF.
+ */
+ if (q == 254 && inlen == 0) {
+ ret = 1;
+ leftover = 0;
+ }
+ else {
+ ret = 0;
+ }
+
+ break;
+ }
+
+ *o++ = carry | q;
+ carry = 0;
+ leftover = 0;
+ outl++;
+ }
+ }
+
+ if (!ret && inlen > 0) {
+ /* Skip to the next valid character in input */
+ while (inlen > 0 && base64_table_dec[*c] >= 254) {
+ c++;
+ inlen--;
+ }
+
+ if (inlen > 0) {
+ seen_error = false;
+ goto repeat;
+ }
+ }
+
+ *outlen = outl;
+
+ return ret;
+}
+
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC pop_options
+#endif
+#endif
diff --git a/src/libcryptobox/base64/base64.c b/src/libcryptobox/base64/base64.c
new file mode 100644
index 0000000..e868924
--- /dev/null
+++ b/src/libcryptobox/base64/base64.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "cryptobox.h"
+#include "base64.h"
+#include "platform_config.h"
+#include "str_util.h"
+#include "util.h"
+#include "contrib/libottery/ottery.h"
+
+extern unsigned cpu_config;
+const uint8_t
+ base64_table_dec[256] =
+ {
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 62,
+ 255,
+ 255,
+ 255,
+ 63,
+ 52,
+ 53,
+ 54,
+ 55,
+ 56,
+ 57,
+ 58,
+ 59,
+ 60,
+ 61,
+ 255,
+ 255,
+ 255,
+ 254,
+ 255,
+ 255,
+ 255,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 11,
+ 12,
+ 13,
+ 14,
+ 15,
+ 16,
+ 17,
+ 18,
+ 19,
+ 20,
+ 21,
+ 22,
+ 23,
+ 24,
+ 25,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 26,
+ 27,
+ 28,
+ 29,
+ 30,
+ 31,
+ 32,
+ 33,
+ 34,
+ 35,
+ 36,
+ 37,
+ 38,
+ 39,
+ 40,
+ 41,
+ 42,
+ 43,
+ 44,
+ 45,
+ 46,
+ 47,
+ 48,
+ 49,
+ 50,
+ 51,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+ 255,
+};
+
+static const char base64_alphabet[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
+
+typedef struct base64_impl {
+ unsigned short enabled;
+ unsigned short min_len;
+ unsigned int cpu_flags;
+ const char *desc;
+ int (*decode)(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen);
+} base64_impl_t;
+
+#define BASE64_DECLARE(ext) \
+ int base64_decode_##ext(const char *in, size_t inlen, unsigned char *out, size_t *outlen);
+#define BASE64_IMPL(cpuflags, min_len, desc, ext) \
+ { \
+ 0, (min_len), (cpuflags), desc, base64_decode_##ext \
+ }
+
+BASE64_DECLARE(ref);
+#define BASE64_REF BASE64_IMPL(0, 0, "ref", ref)
+
+#ifdef RSPAMD_HAS_TARGET_ATTR
+#if defined(HAVE_SSE42) && defined(__x86_64__)
+int base64_decode_sse42(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2")));
+
+BASE64_DECLARE(sse42);
+#define BASE64_SSE42 BASE64_IMPL(CPUID_SSE42, 24, "sse42", sse42)
+#endif
+#endif
+
+#ifdef RSPAMD_HAS_TARGET_ATTR
+#if defined(HAVE_AVX2) && defined(__x86_64__)
+int base64_decode_avx2(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen) __attribute__((__target__("avx2")));
+
+BASE64_DECLARE(avx2);
+#define BASE64_AVX2 BASE64_IMPL(CPUID_AVX2, 128, "avx2", avx2)
+#endif
+#endif
+
+static base64_impl_t base64_list[] = {
+ BASE64_REF,
+#ifdef BASE64_SSE42
+ BASE64_SSE42,
+#endif
+#ifdef BASE64_AVX2
+ BASE64_AVX2,
+#endif
+};
+
+static const base64_impl_t *base64_ref = &base64_list[0];
+
+const char *
+base64_load(void)
+{
+ guint i;
+ const base64_impl_t *opt_impl = base64_ref;
+
+ /* Enable reference */
+ base64_list[0].enabled = true;
+
+ if (cpu_config != 0) {
+ for (i = 1; i < G_N_ELEMENTS(base64_list); i++) {
+ if (base64_list[i].cpu_flags & cpu_config) {
+ base64_list[i].enabled = true;
+ opt_impl = &base64_list[i];
+ }
+ }
+ }
+
+
+ return opt_impl->desc;
+}
+
+gboolean
+rspamd_cryptobox_base64_decode(const gchar *in, gsize inlen,
+ guchar *out, gsize *outlen)
+{
+ const base64_impl_t *opt_impl = base64_ref;
+
+ for (gint i = G_N_ELEMENTS(base64_list) - 1; i > 0; i--) {
+ if (base64_list[i].enabled && base64_list[i].min_len <= inlen) {
+ opt_impl = &base64_list[i];
+ break;
+ }
+ }
+
+ return opt_impl->decode(in, inlen, out, outlen);
+}
+
+double
+base64_test(bool generic, size_t niters, size_t len, size_t str_len)
+{
+ size_t cycles;
+ guchar *in, *out, *tmp;
+ gdouble t1, t2, total = 0;
+ gsize outlen;
+
+ g_assert(len > 0);
+ in = g_malloc(len);
+ tmp = g_malloc(len);
+ ottery_rand_bytes(in, len);
+
+ out = rspamd_encode_base64_fold(in, len, str_len, &outlen,
+ RSPAMD_TASK_NEWLINES_CRLF);
+
+ if (generic) {
+ base64_list[0].decode(out, outlen, tmp, &len);
+ }
+ else {
+ rspamd_cryptobox_base64_decode(out, outlen, tmp, &len);
+ }
+
+ g_assert(memcmp(in, tmp, len) == 0);
+
+ for (cycles = 0; cycles < niters; cycles++) {
+ t1 = rspamd_get_ticks(TRUE);
+ if (generic) {
+ base64_list[0].decode(out, outlen, tmp, &len);
+ }
+ else {
+ rspamd_cryptobox_base64_decode(out, outlen, tmp, &len);
+ }
+ t2 = rspamd_get_ticks(TRUE);
+ total += t2 - t1;
+ }
+
+ g_free(in);
+ g_free(tmp);
+ g_free(out);
+
+ return total;
+}
+
+
+gboolean
+rspamd_cryptobox_base64_is_valid(const gchar *in, gsize inlen)
+{
+ const guchar *p, *end;
+
+ if (inlen == 0) {
+ return FALSE;
+ }
+
+ p = in;
+ end = in + inlen;
+
+ while (p < end && *p != '=') {
+ if (!g_ascii_isspace(*p)) {
+ if (base64_table_dec[*p] == 255) {
+ return FALSE;
+ }
+ }
+ p++;
+ }
+
+ return TRUE;
+} \ No newline at end of file
diff --git a/src/libcryptobox/base64/base64.h b/src/libcryptobox/base64/base64.h
new file mode 100644
index 0000000..f53c80a
--- /dev/null
+++ b/src/libcryptobox/base64/base64.h
@@ -0,0 +1,31 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_LIBCRYPTOBOX_BASE64_BASE64_H_
+#define SRC_LIBCRYPTOBOX_BASE64_BASE64_H_
+
+#include "config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+const char *base64_load(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRC_LIBCRYPTOBOX_BASE64_BASE64_H_ */
diff --git a/src/libcryptobox/base64/ref.c b/src/libcryptobox/base64/ref.c
new file mode 100644
index 0000000..61df68e
--- /dev/null
+++ b/src/libcryptobox/base64/ref.c
@@ -0,0 +1,241 @@
+/*-
+Copyright (c) 2013-2015, Alfred Klomp
+Copyright (c) 2016, Vsevolod Stakhov
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "libutil/util.h"
+
+extern const uint8_t base64_table_dec[256];
+
+#define INNER_LOOP_64 \
+ do { \
+ uint64_t str, res, dec; \
+ bool aligned = rspamd_is_aligned_as(c, str); \
+ while (inlen >= 13) { \
+ if (aligned) { str = *(uint64_t *) c; } \
+ else { \
+ memcpy(&str, c, sizeof(str)); \
+ } \
+ str = GUINT64_TO_BE(str); \
+ if ((dec = base64_table_dec[str >> 56]) > 63) { \
+ break; \
+ } \
+ res = dec << 58; \
+ if ((dec = base64_table_dec[(str >> 48) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 52; \
+ if ((dec = base64_table_dec[(str >> 40) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 46; \
+ if ((dec = base64_table_dec[(str >> 32) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 40; \
+ if ((dec = base64_table_dec[(str >> 24) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 34; \
+ if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 28; \
+ if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 22; \
+ if ((dec = base64_table_dec[str & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 16; \
+ res = GUINT64_FROM_BE(res); \
+ memcpy(o, &res, sizeof(res)); \
+ c += 8; \
+ o += 6; \
+ outl += 6; \
+ inlen -= 8; \
+ } \
+ } while (0)
+
+#define INNER_LOOP_32 \
+ do { \
+ uint32_t str, res, dec; \
+ bool aligned = rspamd_is_aligned_as(c, str); \
+ while (inlen >= 8) { \
+ if (aligned) { str = *(uint32_t *) c; } \
+ else { \
+ memcpy(&str, c, sizeof(str)); \
+ } \
+ str = GUINT32_TO_BE(str); \
+ if ((dec = base64_table_dec[str >> 24]) > 63) { \
+ break; \
+ } \
+ res = dec << 26; \
+ if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 20; \
+ if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 14; \
+ if ((dec = base64_table_dec[str & 0xFF]) > 63) { \
+ break; \
+ } \
+ res |= dec << 8; \
+ res = GUINT32_FROM_BE(res); \
+ memcpy(o, &res, sizeof(res)); \
+ c += 4; \
+ o += 3; \
+ outl += 3; \
+ inlen -= 4; \
+ } \
+ } while (0)
+
+
+int base64_decode_ref(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen)
+{
+ ssize_t ret = 0;
+ const uint8_t *c = (const uint8_t *) in;
+ uint8_t *o = (uint8_t *) out;
+ uint8_t q, carry;
+ size_t outl = 0;
+ size_t leftover = 0;
+
+repeat:
+ switch (leftover) {
+ for (;;) {
+ case 0:
+#if defined(__LP64__)
+ INNER_LOOP_64;
+#else
+ INNER_LOOP_32;
+#endif
+
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ ret = 0;
+ break;
+ }
+ carry = (uint8_t) (q << 2);
+ leftover++;
+
+ case 1:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ ret = 0;
+ break;
+ }
+ *o++ = carry | (q >> 4);
+ carry = (uint8_t) (q << 4);
+ leftover++;
+ outl++;
+
+ case 2:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ leftover++;
+
+ if (q == 254) {
+ if (inlen-- != 0) {
+ leftover = 0;
+ q = base64_table_dec[*c++];
+ ret = ((q == 254) && (inlen == 0)) ? 1 : 0;
+ break;
+ }
+ else {
+ ret = 1;
+ break;
+ }
+ }
+ else {
+ leftover--;
+ }
+ /* If we get here, there was an error: */
+ break;
+ }
+ *o++ = carry | (q >> 2);
+ carry = (uint8_t) (q << 6);
+ leftover++;
+ outl++;
+
+ case 3:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ /*
+ * When q == 254, the input char is '='. Return 1 and EOF.
+ * When q == 255, the input char is invalid. Return 0 and EOF.
+ */
+ if (q == 254 && inlen == 0) {
+ ret = 1;
+ leftover = 0;
+ }
+ else {
+ ret = 0;
+ }
+
+ break;
+ }
+
+ *o++ = carry | q;
+ carry = 0;
+ leftover = 0;
+ outl++;
+ }
+ }
+
+ if (!ret && inlen > 0) {
+ /* Skip to the next valid character in input */
+ while (inlen > 0 && base64_table_dec[*c] >= 254) {
+ c++;
+ inlen--;
+ }
+
+ if (inlen > 0) {
+ goto repeat;
+ }
+ }
+
+ *outlen = outl;
+
+ return ret;
+}
diff --git a/src/libcryptobox/base64/sse42.c b/src/libcryptobox/base64/sse42.c
new file mode 100644
index 0000000..36070ab
--- /dev/null
+++ b/src/libcryptobox/base64/sse42.c
@@ -0,0 +1,268 @@
+/*-
+ * Copyright 2017 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*-
+Copyright (c) 2013-2015, Alfred Klomp
+Copyright (c) 2016, Vsevolod Stakhov
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "cryptobox.h"
+
+extern const uint8_t base64_table_dec[256];
+
+#ifdef RSPAMD_HAS_TARGET_ATTR
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#endif
+#ifndef __SSE2__
+#define __SSE2__
+#endif
+#ifndef __SSE__
+#define __SSE__
+#endif
+#ifndef __SSE4_2__
+#define __SSE4_2__
+#endif
+#ifndef __SSE4_1__
+#define __SSE4_1__
+#endif
+#ifndef __SSEE3__
+#define __SSEE3__
+#endif
+#include <xmmintrin.h>
+#include <nmmintrin.h>
+
+
+static inline __m128i
+dec_reshuffle(__m128i in) __attribute__((__target__("sse4.2")));
+
+static inline __m128i dec_reshuffle(__m128i in)
+{
+ // Mask in a single byte per shift:
+ const __m128i maskB2 = _mm_set1_epi32(0x003F0000);
+ const __m128i maskB1 = _mm_set1_epi32(0x00003F00);
+
+ // Pack bytes together:
+ __m128i out = _mm_srli_epi32(in, 16);
+
+ out = _mm_or_si128(out, _mm_srli_epi32(_mm_and_si128(in, maskB2), 2));
+
+ out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, maskB1), 12));
+
+ out = _mm_or_si128(out, _mm_slli_epi32(in, 26));
+
+ // Reshuffle and repack into 12-byte output format:
+ return _mm_shuffle_epi8(out, _mm_setr_epi8(
+ 3, 2, 1,
+ 7, 6, 5,
+ 11, 10, 9,
+ 15, 14, 13,
+ -1, -1, -1, -1));
+}
+
+#define CMPGT(s, n) _mm_cmpgt_epi8((s), _mm_set1_epi8(n))
+
+#define INNER_LOOP_SSE42 \
+ while (inlen >= 24) { \
+ __m128i str = _mm_loadu_si128((__m128i *) c); \
+ const __m128i lut = _mm_setr_epi8( \
+ 19, 16, 4, 4, \
+ 4, 4, 4, 4, \
+ 4, 4, 4, 4, \
+ 0, 0, -71, -65); \
+ const __m128i range = _mm_setr_epi8( \
+ '+', '+', \
+ '+', '+', \
+ '+', '+', \
+ '+', '+', \
+ '/', '/', \
+ '0', '9', \
+ 'A', 'Z', \
+ 'a', 'z'); \
+ if (_mm_cmpistrc(range, str, _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_NEGATIVE_POLARITY)) { \
+ seen_error = true; \
+ break; \
+ } \
+ __m128i indices = _mm_subs_epu8(str, _mm_set1_epi8(46)); \
+ __m128i mask45 = CMPGT(str, 64); \
+ __m128i mask5 = CMPGT(str, 96); \
+ indices = _mm_andnot_si128(mask45, indices); \
+ mask45 = _mm_add_epi8(_mm_slli_epi16(_mm_abs_epi8(mask45), 4), mask45); \
+ indices = _mm_add_epi8(indices, mask45); \
+ indices = _mm_add_epi8(indices, mask5); \
+ __m128i delta = _mm_shuffle_epi8(lut, indices); \
+ str = _mm_add_epi8(str, delta); \
+ str = dec_reshuffle(str); \
+ _mm_storeu_si128((__m128i *) o, str); \
+ c += 16; \
+ o += 12; \
+ outl += 12; \
+ inlen -= 16; \
+ }
+
+int base64_decode_sse42(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2")));
+int base64_decode_sse42(const char *in, size_t inlen,
+ unsigned char *out, size_t *outlen)
+{
+ ssize_t ret = 0;
+ const uint8_t *c = (const uint8_t *) in;
+ uint8_t *o = (uint8_t *) out;
+ uint8_t q, carry;
+ size_t outl = 0;
+ size_t leftover = 0;
+ bool seen_error = false;
+
+repeat:
+ switch (leftover) {
+ for (;;) {
+ case 0:
+ if (G_LIKELY(!seen_error)) {
+ INNER_LOOP_SSE42
+ }
+
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ ret = 0;
+ break;
+ }
+ carry = q << 2;
+ leftover++;
+
+ case 1:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ ret = 0;
+ break;
+ }
+ *o++ = carry | (q >> 4);
+ carry = q << 4;
+ leftover++;
+ outl++;
+
+ case 2:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ leftover++;
+
+ if (q == 254) {
+ if (inlen-- != 0) {
+ leftover = 0;
+ q = base64_table_dec[*c++];
+ ret = ((q == 254) && (inlen == 0)) ? 1 : 0;
+ break;
+ }
+ else {
+ ret = 1;
+ break;
+ }
+ }
+ else {
+ leftover--;
+ }
+ /* If we get here, there was an error: */
+ break;
+ }
+ *o++ = carry | (q >> 2);
+ carry = q << 6;
+ leftover++;
+ outl++;
+
+ case 3:
+ if (inlen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = base64_table_dec[*c++]) >= 254) {
+ /*
+ * When q == 254, the input char is '='. Return 1 and EOF.
+ * When q == 255, the input char is invalid. Return 0 and EOF.
+ */
+ if (q == 254 && inlen == 0) {
+ ret = 1;
+ leftover = 0;
+ }
+ else {
+ ret = 0;
+ }
+
+ break;
+ }
+
+ *o++ = carry | q;
+ carry = 0;
+ leftover = 0;
+ outl++;
+ }
+ }
+
+ if (!ret && inlen > 0) {
+ /* Skip to the next valid character in input */
+ while (inlen > 0 && base64_table_dec[*c] >= 254) {
+ c++;
+ inlen--;
+ }
+
+ if (inlen > 0) {
+ seen_error = false;
+ goto repeat;
+ }
+ }
+
+ *outlen = outl;
+
+ return ret;
+}
+
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC pop_options
+#endif
+#endif