1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
|
// SPDX-License-Identifier: GPL-2.0
#include <linux/perf_event.h>
#include <asm/perf_event.h>
#include "../perf_event.h"
/* LBR Branch Select valid bits */
#define LBR_SELECT_MASK 0x1ff
/*
* LBR Branch Select filter bits which when set, ensures that the
* corresponding type of branches are not recorded
*/
#define LBR_SELECT_KERNEL 0 /* Branches ending in CPL = 0 */
#define LBR_SELECT_USER 1 /* Branches ending in CPL > 0 */
#define LBR_SELECT_JCC 2 /* Conditional branches */
#define LBR_SELECT_CALL_NEAR_REL 3 /* Near relative calls */
#define LBR_SELECT_CALL_NEAR_IND 4 /* Indirect relative calls */
#define LBR_SELECT_RET_NEAR 5 /* Near returns */
#define LBR_SELECT_JMP_NEAR_IND 6 /* Near indirect jumps (excl. calls and returns) */
#define LBR_SELECT_JMP_NEAR_REL 7 /* Near relative jumps (excl. calls) */
#define LBR_SELECT_FAR_BRANCH 8 /* Far branches */
#define LBR_KERNEL BIT(LBR_SELECT_KERNEL)
#define LBR_USER BIT(LBR_SELECT_USER)
#define LBR_JCC BIT(LBR_SELECT_JCC)
#define LBR_REL_CALL BIT(LBR_SELECT_CALL_NEAR_REL)
#define LBR_IND_CALL BIT(LBR_SELECT_CALL_NEAR_IND)
#define LBR_RETURN BIT(LBR_SELECT_RET_NEAR)
#define LBR_REL_JMP BIT(LBR_SELECT_JMP_NEAR_REL)
#define LBR_IND_JMP BIT(LBR_SELECT_JMP_NEAR_IND)
#define LBR_FAR BIT(LBR_SELECT_FAR_BRANCH)
#define LBR_NOT_SUPP -1 /* unsupported filter */
#define LBR_IGNORE 0
#define LBR_ANY \
(LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN | \
LBR_REL_JMP | LBR_IND_JMP | LBR_FAR)
struct branch_entry {
union {
struct {
u64 ip:58;
u64 ip_sign_ext:5;
u64 mispredict:1;
} split;
u64 full;
} from;
union {
struct {
u64 ip:58;
u64 ip_sign_ext:3;
u64 reserved:1;
u64 spec:1;
u64 valid:1;
} split;
u64 full;
} to;
};
static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val)
{
wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
}
static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val)
{
wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
}
static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx)
{
u64 val;
rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
return val;
}
static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx)
{
u64 val;
rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
return val;
}
static __always_inline u64 sign_ext_branch_ip(u64 ip)
{
u32 shift = 64 - boot_cpu_data.x86_virt_bits;
return (u64)(((s64)ip << shift) >> shift);
}
static void amd_pmu_lbr_filter(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int br_sel = cpuc->br_sel, offset, type, i, j;
bool compress = false;
bool fused_only = false;
u64 from, to;
/* If sampling all branches, there is nothing to filter */
if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
fused_only = true;
for (i = 0; i < cpuc->lbr_stack.nr; i++) {
from = cpuc->lbr_entries[i].from;
to = cpuc->lbr_entries[i].to;
type = branch_type_fused(from, to, 0, &offset);
/*
* Adjust the branch from address in case of instruction
* fusion where it points to an instruction preceding the
* actual branch
*/
if (offset) {
cpuc->lbr_entries[i].from += offset;
if (fused_only)
continue;
}
/* If type does not correspond, then discard */
if (type == X86_BR_NONE || (br_sel & type) != type) {
cpuc->lbr_entries[i].from = 0; /* mark invalid */
compress = true;
}
if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
cpuc->lbr_entries[i].type = common_branch_type(type);
}
if (!compress)
return;
/* Remove all invalid entries */
for (i = 0; i < cpuc->lbr_stack.nr; ) {
if (!cpuc->lbr_entries[i].from) {
j = i;
while (++j < cpuc->lbr_stack.nr)
cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j];
cpuc->lbr_stack.nr--;
if (!cpuc->lbr_entries[i].from)
continue;
}
i++;
}
}
static const int lbr_spec_map[PERF_BR_SPEC_MAX] = {
PERF_BR_SPEC_NA,
PERF_BR_SPEC_WRONG_PATH,
PERF_BR_NON_SPEC_CORRECT_PATH,
PERF_BR_SPEC_CORRECT_PATH,
};
void amd_pmu_lbr_read(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_branch_entry *br = cpuc->lbr_entries;
struct branch_entry entry;
int out = 0, idx, i;
if (!cpuc->lbr_users)
return;
for (i = 0; i < x86_pmu.lbr_nr; i++) {
entry.from.full = amd_pmu_lbr_get_from(i);
entry.to.full = amd_pmu_lbr_get_to(i);
/*
* Check if a branch has been logged; if valid = 0, spec = 0
* then no branch was recorded
*/
if (!entry.to.split.valid && !entry.to.split.spec)
continue;
perf_clear_branch_entry_bitfields(br + out);
br[out].from = sign_ext_branch_ip(entry.from.split.ip);
br[out].to = sign_ext_branch_ip(entry.to.split.ip);
br[out].mispred = entry.from.split.mispredict;
br[out].predicted = !br[out].mispred;
/*
* Set branch speculation information using the status of
* the valid and spec bits.
*
* When valid = 0, spec = 0, no branch was recorded and the
* entry is discarded as seen above.
*
* When valid = 0, spec = 1, the recorded branch was
* speculative but took the wrong path.
*
* When valid = 1, spec = 0, the recorded branch was
* non-speculative but took the correct path.
*
* When valid = 1, spec = 1, the recorded branch was
* speculative and took the correct path
*/
idx = (entry.to.split.valid << 1) | entry.to.split.spec;
br[out].spec = lbr_spec_map[idx];
out++;
}
cpuc->lbr_stack.nr = out;
/*
* Internal register renaming always ensures that LBR From[0] and
* LBR To[0] always represent the TOS
*/
cpuc->lbr_stack.hw_idx = 0;
/* Perform further software filtering */
amd_pmu_lbr_filter();
}
static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
[PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGNORE,
[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR,
[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
[PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT] = LBR_NOT_SUPP,
[PERF_SAMPLE_BRANCH_IN_TX_SHIFT] = LBR_NOT_SUPP,
[PERF_SAMPLE_BRANCH_NO_TX_SHIFT] = LBR_NOT_SUPP,
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP,
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
[PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT] = LBR_NOT_SUPP,
[PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT] = LBR_NOT_SUPP,
};
static int amd_pmu_lbr_setup_filter(struct perf_event *event)
{
struct hw_perf_event_extra *reg = &event->hw.branch_reg;
u64 br_type = event->attr.branch_sample_type;
u64 mask = 0, v;
int i;
/* No LBR support */
if (!x86_pmu.lbr_nr)
return -EOPNOTSUPP;
if (br_type & PERF_SAMPLE_BRANCH_USER)
mask |= X86_BR_USER;
if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
mask |= X86_BR_KERNEL;
/* Ignore BRANCH_HV here */
if (br_type & PERF_SAMPLE_BRANCH_ANY)
mask |= X86_BR_ANY;
if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
mask |= X86_BR_ANY_CALL;
if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
mask |= X86_BR_IND_CALL;
if (br_type & PERF_SAMPLE_BRANCH_COND)
mask |= X86_BR_JCC;
if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
mask |= X86_BR_IND_JMP;
if (br_type & PERF_SAMPLE_BRANCH_CALL)
mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
mask |= X86_BR_TYPE_SAVE;
reg->reg = mask;
mask = 0;
for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
if (!(br_type & BIT_ULL(i)))
continue;
v = lbr_select_map[i];
if (v == LBR_NOT_SUPP)
return -EOPNOTSUPP;
if (v != LBR_IGNORE)
mask |= v;
}
/* Filter bits operate in suppress mode */
reg->config = mask ^ LBR_SELECT_MASK;
return 0;
}
int amd_pmu_lbr_hw_config(struct perf_event *event)
{
int ret = 0;
/* LBR is not recommended in counting mode */
if (!is_sampling_event(event))
return -EINVAL;
ret = amd_pmu_lbr_setup_filter(event);
if (!ret)
event->attach_state |= PERF_ATTACH_SCHED_CB;
return ret;
}
void amd_pmu_lbr_reset(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int i;
if (!x86_pmu.lbr_nr)
return;
/* Reset all branch records individually */
for (i = 0; i < x86_pmu.lbr_nr; i++) {
amd_pmu_lbr_set_from(i, 0);
amd_pmu_lbr_set_to(i, 0);
}
cpuc->last_task_ctx = NULL;
cpuc->last_log_id = 0;
wrmsrl(MSR_AMD64_LBR_SELECT, 0);
}
void amd_pmu_lbr_add(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event_extra *reg = &event->hw.branch_reg;
if (!x86_pmu.lbr_nr)
return;
if (has_branch_stack(event)) {
cpuc->lbr_select = 1;
cpuc->lbr_sel->config = reg->config;
cpuc->br_sel = reg->reg;
}
perf_sched_cb_inc(event->pmu);
if (!cpuc->lbr_users++ && !event->total_time_running)
amd_pmu_lbr_reset();
}
void amd_pmu_lbr_del(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (!x86_pmu.lbr_nr)
return;
if (has_branch_stack(event))
cpuc->lbr_select = 0;
cpuc->lbr_users--;
WARN_ON_ONCE(cpuc->lbr_users < 0);
perf_sched_cb_dec(event->pmu);
}
void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
/*
* A context switch can flip the address space and LBR entries are
* not tagged with an identifier. Hence, branches cannot be resolved
* from the old address space and the LBR records should be wiped.
*/
if (cpuc->lbr_users && sched_in)
amd_pmu_lbr_reset();
}
void amd_pmu_lbr_enable_all(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
u64 lbr_select, dbg_ctl, dbg_extn_cfg;
if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
return;
/* Set hardware branch filter */
if (cpuc->lbr_select) {
lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK;
wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
}
rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
}
void amd_pmu_lbr_disable_all(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
u64 dbg_ctl, dbg_extn_cfg;
if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
return;
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
}
__init int amd_pmu_lbr_init(void)
{
union cpuid_0x80000022_ebx ebx;
if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2))
return -EOPNOTSUPP;
/* Set number of entries */
ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz;
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
return 0;
}
|