diff options
Diffstat (limited to 'src/libcryptobox/base64')
-rw-r--r-- | src/libcryptobox/base64/avx2.c | 287 | ||||
-rw-r--r-- | src/libcryptobox/base64/base64.c | 445 | ||||
-rw-r--r-- | src/libcryptobox/base64/base64.h | 31 | ||||
-rw-r--r-- | src/libcryptobox/base64/ref.c | 241 | ||||
-rw-r--r-- | src/libcryptobox/base64/sse42.c | 268 |
5 files changed, 1272 insertions, 0 deletions
diff --git a/src/libcryptobox/base64/avx2.c b/src/libcryptobox/base64/avx2.c new file mode 100644 index 0000000..38abffc --- /dev/null +++ b/src/libcryptobox/base64/avx2.c @@ -0,0 +1,287 @@ +/*- + * Copyright 2018 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- +Copyright (c) 2013-2015, Alfred Klomp +Copyright (c) 2018, Vsevolod Stakhov +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "cryptobox.h" + +extern const uint8_t base64_table_dec[256]; + +#ifdef RSPAMD_HAS_TARGET_ATTR +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC push_options +#pragma GCC target("avx2") +#endif +#ifndef __SSE2__ +#define __SSE2__ +#endif +#ifndef __SSE__ +#define __SSE__ +#endif +#ifndef __SSE4_2__ +#define __SSE4_2__ +#endif +#ifndef __SSE4_1__ +#define __SSE4_1__ +#endif +#ifndef __SSEE3__ +#define __SSEE3__ +#endif +#ifndef __AVX__ +#define __AVX__ +#endif +#ifndef __AVX2__ +#define __AVX2__ +#endif + +#include <immintrin.h> + +#define CMPGT(s, n) _mm256_cmpgt_epi8((s), _mm256_set1_epi8(n)) +#define CMPEQ(s, n) _mm256_cmpeq_epi8((s), _mm256_set1_epi8(n)) +#define REPLACE(s, n) _mm256_and_si256((s), _mm256_set1_epi8(n)) +#define RANGE(s, a, b) _mm256_andnot_si256(CMPGT((s), (b)), CMPGT((s), (a) -1)) + +static inline __m256i +dec_reshuffle(__m256i in) __attribute__((__target__("avx2"))); + +static inline __m256i +dec_reshuffle(__m256i in) +{ + // in, lower lane, bits, upper case are most significant bits, lower case are least significant bits: + // 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ + // 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG + // 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD + // 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA + + const __m256i merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140)); + // 0000kkkk LLllllll 0000JJJJ JJjjKKKK + // 0000hhhh IIiiiiii 0000GGGG GGggHHHH + // 0000eeee FFffffff 0000DDDD DDddEEEE + // 0000bbbb CCcccccc 0000AAAA AAaaBBBB + + __m256i out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000)); + // 00000000 JJJJJJjj KKKKkkkk LLllllll + // 00000000 GGGGGGgg HHHHhhhh IIiiiiii + // 00000000 DDDDDDdd EEEEeeee FFffffff + // 00000000 AAAAAAaa BBBBbbbb CCcccccc + + // Pack bytes together in each lane: + out = _mm256_shuffle_epi8(out, _mm256_setr_epi8( + 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1, + 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1)); + // 00000000 00000000 00000000 00000000 + // LLllllll KKKKkkkk JJJJJJjj IIiiiiii + // HHHHhhhh GGGGGGgg FFffffff EEEEeeee + // DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa + + // Pack lanes + return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1)); +} + + +#define INNER_LOOP_AVX2 \ + while (inlen >= 45) { \ + __m256i str = _mm256_loadu_si256((__m256i *) c); \ + const __m256i lut_lo = _mm256_setr_epi8( \ + 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \ + 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A, \ + 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \ + 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); \ + const __m256i lut_hi = _mm256_setr_epi8( \ + 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \ + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, \ + 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \ + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); \ + const __m256i lut_roll = _mm256_setr_epi8( \ + 0, 16, 19, 4, -65, -65, -71, -71, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 16, 19, 4, -65, -65, -71, -71, \ + 0, 0, 0, 0, 0, 0, 0, 0); \ + const __m256i mask_2F = _mm256_set1_epi8(0x2f); \ + const __m256i hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), mask_2F); \ + const __m256i lo_nibbles = _mm256_and_si256(str, mask_2F); \ + const __m256i hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles); \ + const __m256i lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles); \ + const __m256i eq_2F = _mm256_cmpeq_epi8(str, mask_2F); \ + const __m256i roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2F, hi_nibbles)); \ + if (!_mm256_testz_si256(lo, hi)) { \ + seen_error = true; \ + break; \ + } \ + str = _mm256_add_epi8(str, roll); \ + str = dec_reshuffle(str); \ + _mm256_storeu_si256((__m256i *) o, str); \ + c += 32; \ + o += 24; \ + outl += 24; \ + inlen -= 32; \ + } + +int base64_decode_avx2(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) __attribute__((__target__("avx2"))); +int base64_decode_avx2(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) +{ + ssize_t ret = 0; + const uint8_t *c = (const uint8_t *) in; + uint8_t *o = (uint8_t *) out; + uint8_t q, carry; + size_t outl = 0; + size_t leftover = 0; + bool seen_error = false; + +repeat: + switch (leftover) { + for (;;) { + case 0: + if (G_LIKELY(!seen_error)) { + INNER_LOOP_AVX2 + } + + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + ret = 0; + break; + } + carry = q << 2; + leftover++; + + case 1: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + ret = 0; + break; + } + *o++ = carry | (q >> 4); + carry = q << 4; + leftover++; + outl++; + + case 2: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + leftover++; + + if (q == 254) { + if (inlen-- != 0) { + leftover = 0; + q = base64_table_dec[*c++]; + ret = ((q == 254) && (inlen == 0)) ? 1 : 0; + break; + } + else { + ret = 1; + break; + } + } + else { + leftover--; + } + /* If we get here, there was an error: */ + break; + } + *o++ = carry | (q >> 2); + carry = q << 6; + leftover++; + outl++; + + case 3: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + /* + * When q == 254, the input char is '='. Return 1 and EOF. + * When q == 255, the input char is invalid. Return 0 and EOF. + */ + if (q == 254 && inlen == 0) { + ret = 1; + leftover = 0; + } + else { + ret = 0; + } + + break; + } + + *o++ = carry | q; + carry = 0; + leftover = 0; + outl++; + } + } + + if (!ret && inlen > 0) { + /* Skip to the next valid character in input */ + while (inlen > 0 && base64_table_dec[*c] >= 254) { + c++; + inlen--; + } + + if (inlen > 0) { + seen_error = false; + goto repeat; + } + } + + *outlen = outl; + + return ret; +} + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC pop_options +#endif +#endif diff --git a/src/libcryptobox/base64/base64.c b/src/libcryptobox/base64/base64.c new file mode 100644 index 0000000..e868924 --- /dev/null +++ b/src/libcryptobox/base64/base64.c @@ -0,0 +1,445 @@ +/* + * Copyright 2023 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "config.h" +#include "cryptobox.h" +#include "base64.h" +#include "platform_config.h" +#include "str_util.h" +#include "util.h" +#include "contrib/libottery/ottery.h" + +extern unsigned cpu_config; +const uint8_t + base64_table_dec[256] = + { + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 62, + 255, + 255, + 255, + 63, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 255, + 255, + 255, + 254, + 255, + 255, + 255, + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 255, + 255, + 255, + 255, + 255, + 255, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, + 255, +}; + +static const char base64_alphabet[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; + +typedef struct base64_impl { + unsigned short enabled; + unsigned short min_len; + unsigned int cpu_flags; + const char *desc; + int (*decode)(const char *in, size_t inlen, + unsigned char *out, size_t *outlen); +} base64_impl_t; + +#define BASE64_DECLARE(ext) \ + int base64_decode_##ext(const char *in, size_t inlen, unsigned char *out, size_t *outlen); +#define BASE64_IMPL(cpuflags, min_len, desc, ext) \ + { \ + 0, (min_len), (cpuflags), desc, base64_decode_##ext \ + } + +BASE64_DECLARE(ref); +#define BASE64_REF BASE64_IMPL(0, 0, "ref", ref) + +#ifdef RSPAMD_HAS_TARGET_ATTR +#if defined(HAVE_SSE42) && defined(__x86_64__) +int base64_decode_sse42(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2"))); + +BASE64_DECLARE(sse42); +#define BASE64_SSE42 BASE64_IMPL(CPUID_SSE42, 24, "sse42", sse42) +#endif +#endif + +#ifdef RSPAMD_HAS_TARGET_ATTR +#if defined(HAVE_AVX2) && defined(__x86_64__) +int base64_decode_avx2(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) __attribute__((__target__("avx2"))); + +BASE64_DECLARE(avx2); +#define BASE64_AVX2 BASE64_IMPL(CPUID_AVX2, 128, "avx2", avx2) +#endif +#endif + +static base64_impl_t base64_list[] = { + BASE64_REF, +#ifdef BASE64_SSE42 + BASE64_SSE42, +#endif +#ifdef BASE64_AVX2 + BASE64_AVX2, +#endif +}; + +static const base64_impl_t *base64_ref = &base64_list[0]; + +const char * +base64_load(void) +{ + guint i; + const base64_impl_t *opt_impl = base64_ref; + + /* Enable reference */ + base64_list[0].enabled = true; + + if (cpu_config != 0) { + for (i = 1; i < G_N_ELEMENTS(base64_list); i++) { + if (base64_list[i].cpu_flags & cpu_config) { + base64_list[i].enabled = true; + opt_impl = &base64_list[i]; + } + } + } + + + return opt_impl->desc; +} + +gboolean +rspamd_cryptobox_base64_decode(const gchar *in, gsize inlen, + guchar *out, gsize *outlen) +{ + const base64_impl_t *opt_impl = base64_ref; + + for (gint i = G_N_ELEMENTS(base64_list) - 1; i > 0; i--) { + if (base64_list[i].enabled && base64_list[i].min_len <= inlen) { + opt_impl = &base64_list[i]; + break; + } + } + + return opt_impl->decode(in, inlen, out, outlen); +} + +double +base64_test(bool generic, size_t niters, size_t len, size_t str_len) +{ + size_t cycles; + guchar *in, *out, *tmp; + gdouble t1, t2, total = 0; + gsize outlen; + + g_assert(len > 0); + in = g_malloc(len); + tmp = g_malloc(len); + ottery_rand_bytes(in, len); + + out = rspamd_encode_base64_fold(in, len, str_len, &outlen, + RSPAMD_TASK_NEWLINES_CRLF); + + if (generic) { + base64_list[0].decode(out, outlen, tmp, &len); + } + else { + rspamd_cryptobox_base64_decode(out, outlen, tmp, &len); + } + + g_assert(memcmp(in, tmp, len) == 0); + + for (cycles = 0; cycles < niters; cycles++) { + t1 = rspamd_get_ticks(TRUE); + if (generic) { + base64_list[0].decode(out, outlen, tmp, &len); + } + else { + rspamd_cryptobox_base64_decode(out, outlen, tmp, &len); + } + t2 = rspamd_get_ticks(TRUE); + total += t2 - t1; + } + + g_free(in); + g_free(tmp); + g_free(out); + + return total; +} + + +gboolean +rspamd_cryptobox_base64_is_valid(const gchar *in, gsize inlen) +{ + const guchar *p, *end; + + if (inlen == 0) { + return FALSE; + } + + p = in; + end = in + inlen; + + while (p < end && *p != '=') { + if (!g_ascii_isspace(*p)) { + if (base64_table_dec[*p] == 255) { + return FALSE; + } + } + p++; + } + + return TRUE; +}
\ No newline at end of file diff --git a/src/libcryptobox/base64/base64.h b/src/libcryptobox/base64/base64.h new file mode 100644 index 0000000..f53c80a --- /dev/null +++ b/src/libcryptobox/base64/base64.h @@ -0,0 +1,31 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBCRYPTOBOX_BASE64_BASE64_H_ +#define SRC_LIBCRYPTOBOX_BASE64_BASE64_H_ + +#include "config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +const char *base64_load(void); + +#ifdef __cplusplus +} +#endif + +#endif /* SRC_LIBCRYPTOBOX_BASE64_BASE64_H_ */ diff --git a/src/libcryptobox/base64/ref.c b/src/libcryptobox/base64/ref.c new file mode 100644 index 0000000..61df68e --- /dev/null +++ b/src/libcryptobox/base64/ref.c @@ -0,0 +1,241 @@ +/*- +Copyright (c) 2013-2015, Alfred Klomp +Copyright (c) 2016, Vsevolod Stakhov +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "libutil/util.h" + +extern const uint8_t base64_table_dec[256]; + +#define INNER_LOOP_64 \ + do { \ + uint64_t str, res, dec; \ + bool aligned = rspamd_is_aligned_as(c, str); \ + while (inlen >= 13) { \ + if (aligned) { str = *(uint64_t *) c; } \ + else { \ + memcpy(&str, c, sizeof(str)); \ + } \ + str = GUINT64_TO_BE(str); \ + if ((dec = base64_table_dec[str >> 56]) > 63) { \ + break; \ + } \ + res = dec << 58; \ + if ((dec = base64_table_dec[(str >> 48) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 52; \ + if ((dec = base64_table_dec[(str >> 40) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 46; \ + if ((dec = base64_table_dec[(str >> 32) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 40; \ + if ((dec = base64_table_dec[(str >> 24) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 34; \ + if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 28; \ + if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 22; \ + if ((dec = base64_table_dec[str & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 16; \ + res = GUINT64_FROM_BE(res); \ + memcpy(o, &res, sizeof(res)); \ + c += 8; \ + o += 6; \ + outl += 6; \ + inlen -= 8; \ + } \ + } while (0) + +#define INNER_LOOP_32 \ + do { \ + uint32_t str, res, dec; \ + bool aligned = rspamd_is_aligned_as(c, str); \ + while (inlen >= 8) { \ + if (aligned) { str = *(uint32_t *) c; } \ + else { \ + memcpy(&str, c, sizeof(str)); \ + } \ + str = GUINT32_TO_BE(str); \ + if ((dec = base64_table_dec[str >> 24]) > 63) { \ + break; \ + } \ + res = dec << 26; \ + if ((dec = base64_table_dec[(str >> 16) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 20; \ + if ((dec = base64_table_dec[(str >> 8) & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 14; \ + if ((dec = base64_table_dec[str & 0xFF]) > 63) { \ + break; \ + } \ + res |= dec << 8; \ + res = GUINT32_FROM_BE(res); \ + memcpy(o, &res, sizeof(res)); \ + c += 4; \ + o += 3; \ + outl += 3; \ + inlen -= 4; \ + } \ + } while (0) + + +int base64_decode_ref(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) +{ + ssize_t ret = 0; + const uint8_t *c = (const uint8_t *) in; + uint8_t *o = (uint8_t *) out; + uint8_t q, carry; + size_t outl = 0; + size_t leftover = 0; + +repeat: + switch (leftover) { + for (;;) { + case 0: +#if defined(__LP64__) + INNER_LOOP_64; +#else + INNER_LOOP_32; +#endif + + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + ret = 0; + break; + } + carry = (uint8_t) (q << 2); + leftover++; + + case 1: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + ret = 0; + break; + } + *o++ = carry | (q >> 4); + carry = (uint8_t) (q << 4); + leftover++; + outl++; + + case 2: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + leftover++; + + if (q == 254) { + if (inlen-- != 0) { + leftover = 0; + q = base64_table_dec[*c++]; + ret = ((q == 254) && (inlen == 0)) ? 1 : 0; + break; + } + else { + ret = 1; + break; + } + } + else { + leftover--; + } + /* If we get here, there was an error: */ + break; + } + *o++ = carry | (q >> 2); + carry = (uint8_t) (q << 6); + leftover++; + outl++; + + case 3: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + /* + * When q == 254, the input char is '='. Return 1 and EOF. + * When q == 255, the input char is invalid. Return 0 and EOF. + */ + if (q == 254 && inlen == 0) { + ret = 1; + leftover = 0; + } + else { + ret = 0; + } + + break; + } + + *o++ = carry | q; + carry = 0; + leftover = 0; + outl++; + } + } + + if (!ret && inlen > 0) { + /* Skip to the next valid character in input */ + while (inlen > 0 && base64_table_dec[*c] >= 254) { + c++; + inlen--; + } + + if (inlen > 0) { + goto repeat; + } + } + + *outlen = outl; + + return ret; +} diff --git a/src/libcryptobox/base64/sse42.c b/src/libcryptobox/base64/sse42.c new file mode 100644 index 0000000..36070ab --- /dev/null +++ b/src/libcryptobox/base64/sse42.c @@ -0,0 +1,268 @@ +/*- + * Copyright 2017 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*- +Copyright (c) 2013-2015, Alfred Klomp +Copyright (c) 2016, Vsevolod Stakhov +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "cryptobox.h" + +extern const uint8_t base64_table_dec[256]; + +#ifdef RSPAMD_HAS_TARGET_ATTR +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC push_options +#pragma GCC target("sse4.2") +#endif +#ifndef __SSE2__ +#define __SSE2__ +#endif +#ifndef __SSE__ +#define __SSE__ +#endif +#ifndef __SSE4_2__ +#define __SSE4_2__ +#endif +#ifndef __SSE4_1__ +#define __SSE4_1__ +#endif +#ifndef __SSEE3__ +#define __SSEE3__ +#endif +#include <xmmintrin.h> +#include <nmmintrin.h> + + +static inline __m128i +dec_reshuffle(__m128i in) __attribute__((__target__("sse4.2"))); + +static inline __m128i dec_reshuffle(__m128i in) +{ + // Mask in a single byte per shift: + const __m128i maskB2 = _mm_set1_epi32(0x003F0000); + const __m128i maskB1 = _mm_set1_epi32(0x00003F00); + + // Pack bytes together: + __m128i out = _mm_srli_epi32(in, 16); + + out = _mm_or_si128(out, _mm_srli_epi32(_mm_and_si128(in, maskB2), 2)); + + out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, maskB1), 12)); + + out = _mm_or_si128(out, _mm_slli_epi32(in, 26)); + + // Reshuffle and repack into 12-byte output format: + return _mm_shuffle_epi8(out, _mm_setr_epi8( + 3, 2, 1, + 7, 6, 5, + 11, 10, 9, + 15, 14, 13, + -1, -1, -1, -1)); +} + +#define CMPGT(s, n) _mm_cmpgt_epi8((s), _mm_set1_epi8(n)) + +#define INNER_LOOP_SSE42 \ + while (inlen >= 24) { \ + __m128i str = _mm_loadu_si128((__m128i *) c); \ + const __m128i lut = _mm_setr_epi8( \ + 19, 16, 4, 4, \ + 4, 4, 4, 4, \ + 4, 4, 4, 4, \ + 0, 0, -71, -65); \ + const __m128i range = _mm_setr_epi8( \ + '+', '+', \ + '+', '+', \ + '+', '+', \ + '+', '+', \ + '/', '/', \ + '0', '9', \ + 'A', 'Z', \ + 'a', 'z'); \ + if (_mm_cmpistrc(range, str, _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_NEGATIVE_POLARITY)) { \ + seen_error = true; \ + break; \ + } \ + __m128i indices = _mm_subs_epu8(str, _mm_set1_epi8(46)); \ + __m128i mask45 = CMPGT(str, 64); \ + __m128i mask5 = CMPGT(str, 96); \ + indices = _mm_andnot_si128(mask45, indices); \ + mask45 = _mm_add_epi8(_mm_slli_epi16(_mm_abs_epi8(mask45), 4), mask45); \ + indices = _mm_add_epi8(indices, mask45); \ + indices = _mm_add_epi8(indices, mask5); \ + __m128i delta = _mm_shuffle_epi8(lut, indices); \ + str = _mm_add_epi8(str, delta); \ + str = dec_reshuffle(str); \ + _mm_storeu_si128((__m128i *) o, str); \ + c += 16; \ + o += 12; \ + outl += 12; \ + inlen -= 16; \ + } + +int base64_decode_sse42(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) __attribute__((__target__("sse4.2"))); +int base64_decode_sse42(const char *in, size_t inlen, + unsigned char *out, size_t *outlen) +{ + ssize_t ret = 0; + const uint8_t *c = (const uint8_t *) in; + uint8_t *o = (uint8_t *) out; + uint8_t q, carry; + size_t outl = 0; + size_t leftover = 0; + bool seen_error = false; + +repeat: + switch (leftover) { + for (;;) { + case 0: + if (G_LIKELY(!seen_error)) { + INNER_LOOP_SSE42 + } + + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + ret = 0; + break; + } + carry = q << 2; + leftover++; + + case 1: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + ret = 0; + break; + } + *o++ = carry | (q >> 4); + carry = q << 4; + leftover++; + outl++; + + case 2: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + leftover++; + + if (q == 254) { + if (inlen-- != 0) { + leftover = 0; + q = base64_table_dec[*c++]; + ret = ((q == 254) && (inlen == 0)) ? 1 : 0; + break; + } + else { + ret = 1; + break; + } + } + else { + leftover--; + } + /* If we get here, there was an error: */ + break; + } + *o++ = carry | (q >> 2); + carry = q << 6; + leftover++; + outl++; + + case 3: + if (inlen-- == 0) { + ret = 1; + break; + } + if ((q = base64_table_dec[*c++]) >= 254) { + /* + * When q == 254, the input char is '='. Return 1 and EOF. + * When q == 255, the input char is invalid. Return 0 and EOF. + */ + if (q == 254 && inlen == 0) { + ret = 1; + leftover = 0; + } + else { + ret = 0; + } + + break; + } + + *o++ = carry | q; + carry = 0; + leftover = 0; + outl++; + } + } + + if (!ret && inlen > 0) { + /* Skip to the next valid character in input */ + while (inlen > 0 && base64_table_dec[*c] >= 254) { + c++; + inlen--; + } + + if (inlen > 0) { + seen_error = false; + goto repeat; + } + } + + *outlen = outl; + + return ret; +} + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC pop_options +#endif +#endif |