1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
|
/* Copyright (c) 2008 MySQL AB, 2009 Sun Microsystems, Inc.
Copyright (c) 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
/*
rdtsc3 -- multi-platform timer code
pgulutzan@mysql.com, 2005-08-29
modified 2008-11-02
*/
#ifndef MY_RDTSC_H
#define MY_RDTSC_H
# ifndef __has_builtin
# define __has_builtin(x) 0 /* Compatibility with non-clang compilers */
# endif
# if __has_builtin(__builtin_readcyclecounter)
# elif defined _WIN32
# include <intrin.h>
# elif defined __i386__ || defined __x86_64__
# include <x86intrin.h>
# elif defined(__INTEL_COMPILER) && defined(__ia64__) && defined(HAVE_IA64INTRIN_H)
# include <ia64intrin.h>
# elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
# include <sys/times.h>
# endif
/**
Characteristics of a timer.
*/
struct my_timer_unit_info
{
/** Routine used for the timer. */
ulonglong routine;
/** Overhead of the timer. */
ulonglong overhead;
/** Frequency of the timer. */
ulonglong frequency;
/** Resolution of the timer. */
ulonglong resolution;
};
/**
Characteristics of all the supported timers.
@sa my_timer_init().
*/
struct my_timer_info
{
/** Characteristics of the cycle timer. */
struct my_timer_unit_info cycles;
/** Characteristics of the nanosecond timer. */
struct my_timer_unit_info nanoseconds;
/** Characteristics of the microsecond timer. */
struct my_timer_unit_info microseconds;
/** Characteristics of the millisecond timer. */
struct my_timer_unit_info milliseconds;
/** Characteristics of the tick timer. */
struct my_timer_unit_info ticks;
};
typedef struct my_timer_info MY_TIMER_INFO;
#define MY_TIMER_ROUTINE_RDTSC 5
#define MY_TIMER_ROUTINE_ASM_IA64 6
#define MY_TIMER_ROUTINE_PPC_GET_TIMEBASE 7
#define MY_TIMER_ROUTINE_GETHRTIME 9
#define MY_TIMER_ROUTINE_READ_REAL_TIME 10
#define MY_TIMER_ROUTINE_CLOCK_GETTIME 11
#define MY_TIMER_ROUTINE_GETTIMEOFDAY 13
#define MY_TIMER_ROUTINE_QUERYPERFORMANCECOUNTER 14
#define MY_TIMER_ROUTINE_GETTICKCOUNT 15
#define MY_TIMER_ROUTINE_TIME 16
#define MY_TIMER_ROUTINE_TIMES 17
#define MY_TIMER_ROUTINE_FTIME 18
#define MY_TIMER_ROUTINE_ASM_GCC_SPARC64 23
#define MY_TIMER_ROUTINE_ASM_GCC_SPARC32 24
#define MY_TIMER_ROUTINE_MACH_ABSOLUTE_TIME 25
#define MY_TIMER_ROUTINE_GETSYSTEMTIMEASFILETIME 26
#define MY_TIMER_ROUTINE_ASM_S390 28
#define MY_TIMER_ROUTINE_AARCH64 29
#define MY_TIMER_ROUTINE_RISCV 30
C_MODE_START
/**
A cycle timer.
On clang we use __builtin_readcyclecounter(), except for AARCH64.
On other compilers:
On IA-32 and AMD64, we use the RDTSC instruction.
On IA-64, we read the ar.itc register.
On SPARC, we read the tick register.
On POWER, we read the Time Base Register (which is not really a cycle count
but a separate counter with less than nanosecond resolution).
On IBM S/390 System z we use the STCK instruction.
On ARM, we probably should use the Generic Timer, but should figure out
how to ensure that it can be accessed.
On AARCH64, we use the generic timer base register. We override clang
implementation for aarch64 as it access a PMU register which is not
guaranteed to be active.
On RISC-V, we use the rdcycle instruction to read from mcycle register.
Sadly, we have nothing for the Digital Alpha, MIPS, Motorola m68k,
HP PA-RISC or other non-mainstream (or obsolete) processors.
TODO: consider C++11 std::chrono::high_resolution_clock.
We fall back to gethrtime() where available.
On the platforms that do not have a CYCLE timer,
"wait" events are initialized to use NANOSECOND instead of CYCLE
during performance_schema initialization (at the server startup).
Linux performance monitor (see "man perf_event_open") can
provide cycle counter on the platforms that do not have
other kinds of cycle counters. But we don't use it so far.
ARM notes
---------
Userspace high precision timing on CNTVCT_EL0 requires that CNTKCTL_EL1
is set to 1 for each CPU in privileged mode.
During tests on ARMv7 Debian, perf_even_open() based cycle counter provided
too low frequency with too high overhead:
MariaDB [performance_schema]> SELECT * FROM performance_timers;
+-------------+-----------------+------------------+----------------+
| TIMER_NAME | TIMER_FREQUENCY | TIMER_RESOLUTION | TIMER_OVERHEAD |
+-------------+-----------------+------------------+----------------+
| CYCLE | 689368159 | 1 | 970 |
| NANOSECOND | 1000000000 | 1 | 308 |
| MICROSECOND | 1000000 | 1 | 417 |
| MILLISECOND | 1000 | 1000 | 407 |
| TICK | 127 | 1 | 612 |
+-------------+-----------------+------------------+----------------+
Therefore, it was decided not to use perf_even_open() on ARM
(i.e. go without CYCLE and have "wait" events use NANOSECOND by default).
@return the current timer value, in cycles.
*/
static inline ulonglong my_timer_cycles(void)
{
# if __has_builtin(__builtin_readcyclecounter) && !defined (__aarch64__)
#define MY_TIMER_ROUTINE_CYCLES MY_TIMER_ROUTINE_AARCH64
return __builtin_readcyclecounter();
# elif defined _M_IX86 || defined _M_X64 || defined __i386__ || defined __x86_64__
#define MY_TIMER_ROUTINE_CYCLES MY_TIMER_ROUTINE_RDTSC
return __rdtsc();
#elif defined _M_ARM64
#define MY_TIMER_ROUTINE_CYCLES MY_TIMER_ROUTINE_AARCH64
return _ReadStatusReg(ARM64_CNTVCT);
# elif defined(__INTEL_COMPILER) && defined(__ia64__) && defined(HAVE_IA64INTRIN_H)
#define MY_TIMER_ROUTINE_CYCLES MY_TIMER_ROUTINE_ASM_IA64
return (ulonglong) __getReg(_IA64_REG_AR_ITC); /* (3116) */
#elif defined(__GNUC__) && defined(__ia64__)
#define MY_TIMER_ROUTINE_CYCLES MY_TIMER_ROUTINE_ASM_IA64
{
ulonglong result;
__asm __volatile__ ("mov %0=ar.itc" : "=r" (result));
return result;
}
#elif defined __GNUC__ && defined __powerpc__
#define MY_TIMER_ROUTINE_CYCLES MY_TIMER_ROUTINE_PPC_GET_TIMEBASE
return __builtin_ppc_get_timebase();
#elif defined(__GNUC__) && defined(__sparcv9) && defined(_LP64)
#define MY_TIMER_ROUTINE_CYCLES MY_TIMER_ROUTINE_ASM_GCC_SPARC64
{
ulonglong result;
__asm __volatile__ ("rd %%tick,%0" : "=r" (result));
return result;
}
#elif defined(__GNUC__) && defined(__sparc__) && !defined(_LP64)
#define MY_TIMER_ROUTINE_CYCLES MY_TIMER_ROUTINE_ASM_GCC_SPARC32
{
union {
ulonglong wholeresult;
struct {
ulong high;
ulong low;
} splitresult;
} result;
__asm __volatile__ ("rd %%tick,%1; srlx %1,32,%0" : "=r" (result.splitresult.high), "=r" (result.splitresult.low));
return result.wholeresult;
}
#elif defined(__GNUC__) && defined(__s390__)
#define MY_TIMER_ROUTINE_CYCLES MY_TIMER_ROUTINE_ASM_S390
/* covers both s390 and s390x */
{
ulonglong result;
__asm__ __volatile__ ("stck %0" : "=Q" (result) : : "cc");
return result;
}
#elif defined(__GNUC__) && defined (__aarch64__)
#define MY_TIMER_ROUTINE_CYCLES MY_TIMER_ROUTINE_AARCH64
{
ulonglong result;
__asm __volatile("mrs %0, CNTVCT_EL0" : "=&r" (result));
return result;
}
#elif defined(__riscv)
#define MY_TIMER_ROUTINE_CYCLES MY_TIMER_ROUTINE_RISCV
/* Use RDCYCLE (and RDCYCLEH on riscv32) */
{
# if __riscv_xlen == 32
ulong result_lo, result_hi0, result_hi1;
/* Implemented in assembly because Clang insisted on branching. */
__asm __volatile__(
"rdcycleh %0\n"
"rdcycle %1\n"
"rdcycleh %2\n"
"sub %0, %0, %2\n"
"seqz %0, %0\n"
"sub %0, zero, %0\n"
"and %1, %1, %0\n"
: "=r"(result_hi0), "=r"(result_lo), "=r"(result_hi1));
return (static_cast<ulonglong>(result_hi1) << 32) | result_lo;
# else
ulonglong result;
__asm __volatile__("rdcycle %0" : "=r"(result));
return result;
}
# endif
#elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
#define MY_TIMER_ROUTINE_CYCLES MY_TIMER_ROUTINE_GETHRTIME
/* gethrtime may appear as either cycle or nanosecond counter */
return (ulonglong) gethrtime();
#else
#define MY_TIMER_ROUTINE_CYCLES 0
return 0;
#endif
}
/**
A nanosecond timer.
@return the current timer value, in nanoseconds.
*/
ulonglong my_timer_nanoseconds(void);
/**
A microseconds timer.
@return the current timer value, in microseconds.
*/
ulonglong my_timer_microseconds(void);
/**
A millisecond timer.
@return the current timer value, in milliseconds.
*/
ulonglong my_timer_milliseconds(void);
/**
A ticks timer.
@return the current timer value, in ticks.
*/
ulonglong my_timer_ticks(void);
/**
Timer initialization function.
@param [out] mti the timer characteristics.
*/
void my_timer_init(MY_TIMER_INFO *mti);
C_MODE_END
#endif
|