1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
|
/*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SKIA_OPTS_INTERNAL_H
#define SKIA_OPTS_INTERNAL_H
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
#include <immintrin.h>
#endif
namespace SK_OPTS_NS {
static void RGB1_to_RGB_portable(uint8_t dst[], const uint32_t* src, int count) {
for (int i = 0; i < count; i++) {
dst[0] = src[i] >> 0;
dst[1] = src[i] >> 8;
dst[2] = src[i] >> 16;
dst += 3;
}
}
static void RGB1_to_R_portable(uint8_t dst[], const uint32_t* src, int count) {
for (int i = 0; i < count; i++) {
dst[i] = src[i] & 0xFF;
}
}
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
inline void RGB1_to_RGB(uint8_t dst[], const uint32_t* src, int count) {
const uint8_t X = 0xFF; // Used a placeholder. The value of X is irrelevant.
__m128i pack = _mm_setr_epi8(0,1,2, 4,5,6, 8,9,10, 12,13,14, X,X,X,X);
// Storing 4 pixels should store 12 bytes, but here it stores 16, so test count >= 6
// in order to not overrun the output buffer.
while (count >= 6) {
__m128i rgba = _mm_loadu_si128((const __m128i*) src);
__m128i rgb = _mm_shuffle_epi8(rgba, pack);
// Store 4 pixels.
_mm_storeu_si128((__m128i*) dst, rgb);
src += 4;
dst += 4*3;
count -= 4;
}
RGB1_to_RGB_portable(dst, src, count);
}
inline void RGB1_to_R(uint8_t dst[], const uint32_t* src, int count) {
const uint8_t X = 0xFF; // Used a placeholder. The value of X is irrelevant.
__m128i pack = _mm_setr_epi8(0,4,8,12, X,X,X,X,X,X,X,X,X,X,X,X);
while (count >= 4) {
__m128i rgba = _mm_loadu_si128((const __m128i*) src);
__m128i rgb = _mm_shuffle_epi8(rgba, pack);
// Store 4 pixels.
*((uint32_t*)dst) = _mm_cvtsi128_si32(rgb);
src += 4;
dst += 4;
count -= 4;
}
RGB1_to_R_portable(dst, src, count);
}
#else
inline void RGB1_to_RGB(uint8_t dst[], const uint32_t* src, int count) {
RGB1_to_RGB_portable(dst, src, count);
}
inline void RGB1_to_R(uint8_t dst[], const uint32_t* src, int count) {
RGB1_to_R_portable(dst, src, count);
}
#endif
} // namespace
#endif
|