summaryrefslogtreecommitdiffstats
path: root/gfx/skia/skia/src/core/SkOpts_erms.cpp
blob: 4e1e096d7d9fac6aedcbd38684b47488ec9167f3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
/*
 * Copyright 2020 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

#include "src/base/SkMSAN.h"
#include "src/core/SkOpts.h"

#if defined(__x86_64__) || defined(_M_X64)  // memset16 and memset32 could work on 32-bit x86 too.

    static const char* note = "MSAN can't see that rep sto initializes memory.";

    #if defined(_MSC_VER)
        #include <intrin.h>
        static inline void repsto(uint16_t* dst, uint16_t v, size_t n) {
            sk_msan_mark_initialized(dst,dst+n,note);
            __stosw(dst, v, n);
        }
        static inline void repsto(uint32_t* dst, uint32_t v, size_t n) {
            sk_msan_mark_initialized(dst,dst+n,note);
            static_assert(sizeof(uint32_t) == sizeof(unsigned long));
            __stosd(reinterpret_cast<unsigned long*>(dst), v, n);
        }
        static inline void repsto(uint64_t* dst, uint64_t v, size_t n) {
            sk_msan_mark_initialized(dst,dst+n,note);
            __stosq(dst, v, n);
        }
    #else
        static inline void repsto(uint16_t* dst, uint16_t v, size_t n) {
            sk_msan_mark_initialized(dst,dst+n,note);
            asm volatile("rep stosw" : "+D"(dst), "+c"(n) : "a"(v) : "memory");
        }
        static inline void repsto(uint32_t* dst, uint32_t v, size_t n) {
            sk_msan_mark_initialized(dst,dst+n,note);
            asm volatile("rep stosl" : "+D"(dst), "+c"(n) : "a"(v) : "memory");
        }
        static inline void repsto(uint64_t* dst, uint64_t v, size_t n) {
            sk_msan_mark_initialized(dst,dst+n,note);
            asm volatile("rep stosq" : "+D"(dst), "+c"(n) : "a"(v) : "memory");
        }
    #endif

    // ERMS is ideal for large copies but has a relatively high setup cost,
    // so we use the previous best routine for small inputs.  FSRM would make this moot.
    static void (*g_memset16_prev)(uint16_t*, uint16_t, int);
    static void (*g_memset32_prev)(uint32_t*, uint32_t, int);
    static void (*g_memset64_prev)(uint64_t*, uint64_t, int);
    static void (*g_rect_memset16_prev)(uint16_t*, uint16_t, int, size_t, int);
    static void (*g_rect_memset32_prev)(uint32_t*, uint32_t, int, size_t, int);
    static void (*g_rect_memset64_prev)(uint64_t*, uint64_t, int, size_t, int);

    // Empirically determined with `nanobench -m memset`.
    static bool small(size_t bytes) { return bytes < 1024; }

    #define SK_OPTS_NS erms
    namespace SK_OPTS_NS {
        static inline void memset16(uint16_t* dst, uint16_t v, int n) {
            return small(sizeof(v)*n) ? g_memset16_prev(dst, v, n)
                                      : repsto(dst, v, n);
        }
        static inline void memset32(uint32_t* dst, uint32_t v, int n) {
            return small(sizeof(v)*n) ? g_memset32_prev(dst, v, n)
                                      : repsto(dst, v, n);
        }
        static inline void memset64(uint64_t* dst, uint64_t v, int n) {
            return small(sizeof(v)*n) ? g_memset64_prev(dst, v, n)
                                      : repsto(dst, v, n);
        }

        static inline void rect_memset16(uint16_t* dst, uint16_t v, int n,
                                         size_t rowBytes, int height) {
            if (small(sizeof(v)*n)) {
                return g_rect_memset16_prev(dst,v,n, rowBytes,height);
            }
            for (int stride = rowBytes/sizeof(v); height --> 0; dst += stride) {
                repsto(dst, v, n);
            }
        }
        static inline void rect_memset32(uint32_t* dst, uint32_t v, int n,
                                         size_t rowBytes, int height) {
            if (small(sizeof(v)*n)) {
                return g_rect_memset32_prev(dst,v,n, rowBytes,height);
            }
            for (int stride = rowBytes/sizeof(v); height --> 0; dst += stride) {
                repsto(dst, v, n);
            }
        }
        static inline void rect_memset64(uint64_t* dst, uint64_t v, int n,
                                         size_t rowBytes, int height) {
            if (small(sizeof(v)*n)) {
                return g_rect_memset64_prev(dst,v,n, rowBytes,height);
            }
            for (int stride = rowBytes/sizeof(v); height --> 0; dst += stride) {
                repsto(dst, v, n);
            }
        }
    }  // namespace SK_OPTS_NS

    namespace SkOpts {
        void Init_erms() {
            g_memset16_prev      = memset16;
            g_memset32_prev      = memset32;
            g_memset64_prev      = memset64;
            g_rect_memset16_prev = rect_memset16;
            g_rect_memset32_prev = rect_memset32;
            g_rect_memset64_prev = rect_memset64;

            memset16      = SK_OPTS_NS::memset16;
            memset32      = SK_OPTS_NS::memset32;
            memset64      = SK_OPTS_NS::memset64;
            rect_memset16 = SK_OPTS_NS::rect_memset16;
            rect_memset32 = SK_OPTS_NS::rect_memset32;
            rect_memset64 = SK_OPTS_NS::rect_memset64;
        }
    }
#else
    namespace SkOpts {
        void Init_erms() {}
    }
#endif