diff options
Diffstat (limited to '')
-rw-r--r-- | src/third-party/base64/lib/arch/neon32/enc_translate.c | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/src/third-party/base64/lib/arch/neon32/enc_translate.c b/src/third-party/base64/lib/arch/neon32/enc_translate.c new file mode 100644 index 0000000..e616d54 --- /dev/null +++ b/src/third-party/base64/lib/arch/neon32/enc_translate.c @@ -0,0 +1,57 @@ +static inline uint8x16x4_t +enc_translate (const uint8x16x4_t in) +{ + // A lookup table containing the absolute offsets for all ranges: + const uint8x16_t lut = { + 65U, 71U, 252U, 252U, + 252U, 252U, 252U, 252U, + 252U, 252U, 252U, 252U, + 237U, 240U, 0U, 0U + }; + + const uint8x16_t offset = vdupq_n_u8(51); + + uint8x16x4_t indices, mask, delta, out; + + // Translate values 0..63 to the Base64 alphabet. There are five sets: + // # From To Abs Index Characters + // 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ + // 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz + // 2 [52..61] [48..57] -4 [2..11] 0123456789 + // 3 [62] [43] -19 12 + + // 4 [63] [47] -16 13 / + + // Create LUT indices from input: + // the index for range #0 is right, others are 1 less than expected: + indices.val[0] = vqsubq_u8(in.val[0], offset); + indices.val[1] = vqsubq_u8(in.val[1], offset); + indices.val[2] = vqsubq_u8(in.val[2], offset); + indices.val[3] = vqsubq_u8(in.val[3], offset); + + // mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: + mask.val[0] = vcgtq_u8(in.val[0], vdupq_n_u8(25)); + mask.val[1] = vcgtq_u8(in.val[1], vdupq_n_u8(25)); + mask.val[2] = vcgtq_u8(in.val[2], vdupq_n_u8(25)); + mask.val[3] = vcgtq_u8(in.val[3], vdupq_n_u8(25)); + + // Subtract -1, so add 1 to indices for range #[1..4], All indices are + // now correct: + indices.val[0] = vsubq_u8(indices.val[0], mask.val[0]); + indices.val[1] = vsubq_u8(indices.val[1], mask.val[1]); + indices.val[2] = vsubq_u8(indices.val[2], mask.val[2]); + indices.val[3] = vsubq_u8(indices.val[3], mask.val[3]); + + // Lookup delta values: + delta.val[0] = vqtbl1q_u8(lut, indices.val[0]); + delta.val[1] = vqtbl1q_u8(lut, indices.val[1]); + delta.val[2] = vqtbl1q_u8(lut, indices.val[2]); + delta.val[3] = vqtbl1q_u8(lut, indices.val[3]); + + // Add delta values: + out.val[0] = vaddq_u8(in.val[0], delta.val[0]); + out.val[1] = vaddq_u8(in.val[1], delta.val[1]); + out.val[2] = vaddq_u8(in.val[2], delta.val[2]); + out.val[3] = vaddq_u8(in.val[3], delta.val[3]); + + return out; +} |