summaryrefslogtreecommitdiffstats
path: root/src/third-party/base64/lib/arch/neon32/enc_translate.c
blob: e616d54bc15dc7b54b40364a6918eeb671e8ad41 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
static inline uint8x16x4_t
enc_translate (const uint8x16x4_t in)
{
	// A lookup table containing the absolute offsets for all ranges:
	const uint8x16_t lut = {
		 65U,  71U, 252U, 252U,
		252U, 252U, 252U, 252U,
		252U, 252U, 252U, 252U,
		237U, 240U,   0U,   0U
	};

	const uint8x16_t offset = vdupq_n_u8(51);

	uint8x16x4_t indices, mask, delta, out;

	// Translate values 0..63 to the Base64 alphabet. There are five sets:
	// #  From      To         Abs    Index  Characters
	// 0  [0..25]   [65..90]   +65        0  ABCDEFGHIJKLMNOPQRSTUVWXYZ
	// 1  [26..51]  [97..122]  +71        1  abcdefghijklmnopqrstuvwxyz
	// 2  [52..61]  [48..57]    -4  [2..11]  0123456789
	// 3  [62]      [43]       -19       12  +
	// 4  [63]      [47]       -16       13  /

	// Create LUT indices from input:
	// the index for range #0 is right, others are 1 less than expected:
	indices.val[0] = vqsubq_u8(in.val[0], offset);
	indices.val[1] = vqsubq_u8(in.val[1], offset);
	indices.val[2] = vqsubq_u8(in.val[2], offset);
	indices.val[3] = vqsubq_u8(in.val[3], offset);

	// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0:
	mask.val[0] = vcgtq_u8(in.val[0], vdupq_n_u8(25));
	mask.val[1] = vcgtq_u8(in.val[1], vdupq_n_u8(25));
	mask.val[2] = vcgtq_u8(in.val[2], vdupq_n_u8(25));
	mask.val[3] = vcgtq_u8(in.val[3], vdupq_n_u8(25));

	// Subtract -1, so add 1 to indices for range #[1..4], All indices are
	// now correct:
	indices.val[0] = vsubq_u8(indices.val[0], mask.val[0]);
	indices.val[1] = vsubq_u8(indices.val[1], mask.val[1]);
	indices.val[2] = vsubq_u8(indices.val[2], mask.val[2]);
	indices.val[3] = vsubq_u8(indices.val[3], mask.val[3]);

	// Lookup delta values:
	delta.val[0] = vqtbl1q_u8(lut, indices.val[0]);
	delta.val[1] = vqtbl1q_u8(lut, indices.val[1]);
	delta.val[2] = vqtbl1q_u8(lut, indices.val[2]);
	delta.val[3] = vqtbl1q_u8(lut, indices.val[3]);

	// Add delta values:
	out.val[0] = vaddq_u8(in.val[0], delta.val[0]);
	out.val[1] = vaddq_u8(in.val[1], delta.val[1]);
	out.val[2] = vaddq_u8(in.val[2], delta.val[2]);
	out.val[3] = vaddq_u8(in.val[3], delta.val[3]);

	return out;
}