1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
|
#ifndef MY_CPU_INCLUDED
#define MY_CPU_INCLUDED
/* Copyright (c) 2013, 2020, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
*/
/* instructions for specific cpu's */
/*
Macros for adjusting thread priority (hardware multi-threading)
The defines are the same ones used by the linux kernel
*/
#ifdef _ARCH_PWR8
#include <sys/platform/ppc.h>
/* Very low priority */
#define HMT_very_low() __ppc_set_ppr_very_low()
/* Low priority */
#define HMT_low() __ppc_set_ppr_low()
/* Medium low priority */
#define HMT_medium_low() __ppc_set_ppr_med_low()
/* Medium priority */
#define HMT_medium() __ppc_set_ppr_med()
/* Medium high priority */
#define HMT_medium_high() __ppc_set_ppr_med_high()
/* High priority */
#define HMT_high() asm volatile("or 3,3,3")
#else
#define HMT_very_low()
#define HMT_low()
#define HMT_medium_low()
#define HMT_medium()
#define HMT_medium_high()
#define HMT_high()
#endif
#if defined __i386__ || defined __x86_64__ || defined _WIN32
# define HAVE_PAUSE_INSTRUCTION /* added in Intel Pentium 4 */
#endif
#ifdef _WIN32
#elif defined HAVE_PAUSE_INSTRUCTION
#elif defined(_ARCH_PWR8)
#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__)
#else
# include "my_global.h"
# include "my_atomic.h"
#endif
static inline void MY_RELAX_CPU(void)
{
#ifdef _WIN32
/*
In the Win32 API, the x86 PAUSE instruction is executed by calling
the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
independent way by using YieldProcessor.
*/
YieldProcessor();
#elif defined HAVE_PAUSE_INSTRUCTION
/*
According to the gcc info page, asm volatile means that the
instruction has important side-effects and must not be removed.
Also asm volatile may trigger a memory barrier (spilling all registers
to memory).
*/
#ifdef __SUNPRO_CC
asm ("pause" );
#else
__asm__ __volatile__ ("pause");
#endif
#elif defined(_ARCH_PWR8)
__ppc_get_timebase();
#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__)
/* Mainly, prevent the compiler from optimizing away delay loops */
#ifdef _aarch64_
__asm__ __volatile__ ("isb":::"memory");
#else
/*
some older 32 bits processor doesn't support isb but as per
arm-v8 reference manual all armv8 processor should support isb.
*/
__asm__ __volatile__ ("":::"memory");
#endif
#else
int32 var, oldval = 0;
my_atomic_cas32_strong_explicit(&var, &oldval, 1, MY_MEMORY_ORDER_RELAXED,
MY_MEMORY_ORDER_RELAXED);
#endif
}
#ifdef HAVE_PAUSE_INSTRUCTION
# ifdef __cplusplus
extern "C" {
# endif
extern unsigned my_cpu_relax_multiplier;
void my_cpu_init(void);
# ifdef __cplusplus
}
# endif
#else
# define my_cpu_relax_multiplier 200
# define my_cpu_init() /* nothing */
#endif
/*
LF_BACKOFF should be used to improve performance on hyperthreaded CPUs. Intel
recommends to use it in spin loops also on non-HT machines to reduce power
consumption (see e.g http://softwarecommunity.intel.com/articles/eng/2004.htm)
Running benchmarks for spinlocks implemented with InterlockedCompareExchange
and YieldProcessor shows that much better performance is achieved by calling
YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
loop count in the range 200-300 brought best results.
*/
static inline int LF_BACKOFF(void)
{
unsigned i= my_cpu_relax_multiplier;
while (i--)
MY_RELAX_CPU();
return 1;
}
/**
Run a delay loop while waiting for a shared resource to be released.
@param delay originally, roughly microseconds on 100 MHz Intel Pentium
*/
static inline void ut_delay(unsigned delay)
{
unsigned i= my_cpu_relax_multiplier / 4 * delay;
HMT_low();
while (i--)
MY_RELAX_CPU();
HMT_medium();
}
#endif
|