diff options
Diffstat (limited to '')
-rw-r--r-- | src/third-party/base64/lib/arch/ssse3/enc_loop.c | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/src/third-party/base64/lib/arch/ssse3/enc_loop.c b/src/third-party/base64/lib/arch/ssse3/enc_loop.c new file mode 100644 index 0000000..6de652e --- /dev/null +++ b/src/third-party/base64/lib/arch/ssse3/enc_loop.c @@ -0,0 +1,67 @@ +static inline void +enc_loop_ssse3_inner (const uint8_t **s, uint8_t **o) +{ + // Load input: + __m128i str = _mm_loadu_si128((__m128i *) *s); + + // Reshuffle: + str = enc_reshuffle(str); + + // Translate reshuffled bytes to the Base64 alphabet: + str = enc_translate(str); + + // Store: + _mm_storeu_si128((__m128i *) *o, str); + + *s += 12; + *o += 16; +} + +static inline void +enc_loop_ssse3 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen) +{ + if (*slen < 16) { + return; + } + + // Process blocks of 12 bytes at a time. Because blocks are loaded 16 + // bytes at a time, ensure that there will be at least 4 remaining + // bytes after the last round, so that the final read will not pass + // beyond the bounds of the input buffer: + size_t rounds = (*slen - 4) / 12; + + *slen -= rounds * 12; // 12 bytes consumed per round + *olen += rounds * 16; // 16 bytes produced per round + + do { + if (rounds >= 8) { + enc_loop_ssse3_inner(s, o); + enc_loop_ssse3_inner(s, o); + enc_loop_ssse3_inner(s, o); + enc_loop_ssse3_inner(s, o); + enc_loop_ssse3_inner(s, o); + enc_loop_ssse3_inner(s, o); + enc_loop_ssse3_inner(s, o); + enc_loop_ssse3_inner(s, o); + rounds -= 8; + continue; + } + if (rounds >= 4) { + enc_loop_ssse3_inner(s, o); + enc_loop_ssse3_inner(s, o); + enc_loop_ssse3_inner(s, o); + enc_loop_ssse3_inner(s, o); + rounds -= 4; + continue; + } + if (rounds >= 2) { + enc_loop_ssse3_inner(s, o); + enc_loop_ssse3_inner(s, o); + rounds -= 2; + continue; + } + enc_loop_ssse3_inner(s, o); + break; + + } while (rounds > 0); +} |