diff options
Diffstat (limited to 'tools/perf/arch/x86/util')
25 files changed, 4200 insertions, 0 deletions
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build new file mode 100644 index 000000000..93c3ee5f9 --- /dev/null +++ b/tools/perf/arch/x86/util/Build @@ -0,0 +1,24 @@ +perf-y += header.o +perf-y += tsc.o +perf-y += pmu.o +perf-y += kvm-stat.o +perf-y += perf_regs.o +perf-y += topdown.o +perf-y += machine.o +perf-y += event.o +perf-y += evlist.o +perf-y += mem-events.o +perf-y += evsel.o +perf-y += iostat.o +perf-y += env.o + +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o + +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o + +perf-$(CONFIG_AUXTRACE) += auxtrace.o +perf-$(CONFIG_AUXTRACE) += archinsn.o +perf-$(CONFIG_AUXTRACE) += intel-pt.o +perf-$(CONFIG_AUXTRACE) += intel-bts.o diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c new file mode 100644 index 000000000..546feda08 --- /dev/null +++ b/tools/perf/arch/x86/util/archinsn.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "archinsn.h" +#include "event.h" +#include "machine.h" +#include "thread.h" +#include "symbol.h" +#include "../../../../arch/x86/include/asm/insn.h" + +void arch_fetch_insn(struct perf_sample *sample, + struct thread *thread, + struct machine *machine) +{ + struct insn insn; + int len, ret; + bool is64bit = false; + + if (!sample->ip) + return; + len = thread__memcpy(thread, machine, sample->insn, sample->ip, sizeof(sample->insn), &is64bit); + if (len <= 0) + return; + + ret = insn_decode(&insn, sample->insn, len, + is64bit ? INSN_MODE_64 : INSN_MODE_32); + if (ret >= 0 && insn.length <= len) + sample->insn_len = insn.length; +} diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c new file mode 100644 index 000000000..3da506e13 --- /dev/null +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * auxtrace.c: AUX area tracing support + * Copyright (c) 2013-2014, Intel Corporation. + */ + +#include <errno.h> +#include <stdbool.h> + +#include "../../../util/header.h" +#include "../../../util/debug.h" +#include "../../../util/pmu.h" +#include "../../../util/auxtrace.h" +#include "../../../util/intel-pt.h" +#include "../../../util/intel-bts.h" +#include "../../../util/evlist.h" + +static +struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist, + int *err) +{ + struct perf_pmu *intel_pt_pmu; + struct perf_pmu *intel_bts_pmu; + struct evsel *evsel; + bool found_pt = false; + bool found_bts = false; + + intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); + if (intel_pt_pmu) + intel_pt_pmu->auxtrace = true; + intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); + if (intel_bts_pmu) + intel_bts_pmu->auxtrace = true; + + evlist__for_each_entry(evlist, evsel) { + if (intel_pt_pmu && evsel->core.attr.type == intel_pt_pmu->type) + found_pt = true; + if (intel_bts_pmu && evsel->core.attr.type == intel_bts_pmu->type) + found_bts = true; + } + + if (found_pt && found_bts) { + pr_err("intel_pt and intel_bts may not be used together\n"); + *err = -EINVAL; + return NULL; + } + + if (found_pt) + return intel_pt_recording_init(err); + + if (found_bts) + return intel_bts_recording_init(err); + + return NULL; +} + +struct auxtrace_record *auxtrace_record__init(struct evlist *evlist, + int *err) +{ + char buffer[64]; + int ret; + + *err = 0; + + ret = get_cpuid(buffer, sizeof(buffer)); + if (ret) { + *err = ret; + return NULL; + } + + if (!strncmp(buffer, "GenuineIntel,", 13)) + return auxtrace_record__init_intel(evlist, err); + + return NULL; +} diff --git a/tools/perf/arch/x86/util/cpuid.h b/tools/perf/arch/x86/util/cpuid.h new file mode 100644 index 000000000..0a3ae0ace --- /dev/null +++ b/tools/perf/arch/x86/util/cpuid.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef PERF_CPUID_H +#define PERF_CPUID_H 1 + + +static inline void +cpuid(unsigned int op, unsigned int op2, unsigned int *a, unsigned int *b, + unsigned int *c, unsigned int *d) +{ + /* + * Preserve %ebx/%rbx register by either placing it in %rdi or saving it + * on the stack - x86-64 needs to avoid the stack red zone. In PIC + * compilations %ebx contains the address of the global offset + * table. %rbx is occasionally used to address stack variables in + * presence of dynamic allocas. + */ + asm( +#if defined(__x86_64__) + "mov %%rbx, %%rdi\n" + "cpuid\n" + "xchg %%rdi, %%rbx\n" +#else + "pushl %%ebx\n" + "cpuid\n" + "movl %%ebx, %%edi\n" + "popl %%ebx\n" +#endif + : "=a"(*a), "=D"(*b), "=c"(*c), "=d"(*d) + : "a"(op), "2"(op2)); +} + +void get_cpuid_0(char *vendor, unsigned int *lvl); + +#endif diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c new file mode 100644 index 000000000..530934805 --- /dev/null +++ b/tools/perf/arch/x86/util/dwarf-regs.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. + * Extracted from probe-finder.c + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + */ + +#include <stddef.h> +#include <errno.h> /* for EINVAL */ +#include <string.h> /* for strcmp */ +#include <linux/ptrace.h> /* for struct pt_regs */ +#include <linux/kernel.h> /* for offsetof */ +#include <dwarf-regs.h> + +/* + * See arch/x86/kernel/ptrace.c. + * Different from it: + * + * - Since struct pt_regs is defined differently for user and kernel, + * but we want to use 'ax, bx' instead of 'rax, rbx' (which is struct + * field name of user's pt_regs), we make REG_OFFSET_NAME to accept + * both string name and reg field name. + * + * - Since accessing x86_32's pt_regs from x86_64 building is difficult + * and vise versa, we simply fill offset with -1, so + * get_arch_regstr() still works but regs_query_register_offset() + * returns error. + * The only inconvenience caused by it now is that we are not allowed + * to generate BPF prologue for a x86_64 kernel if perf is built for + * x86_32. This is really a rare usecase. + * + * - Order is different from kernel's ptrace.c for get_arch_regstr(). Use + * the order defined by dwarf. + */ + +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +#ifdef __x86_64__ +# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} +# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = -1} +#else +# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = -1} +# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} +#endif + +/* TODO: switching by dwarf address size */ +#ifndef __x86_64__ +static const struct pt_regs_offset x86_32_regoffset_table[] = { + REG_OFFSET_NAME_32("%ax", eax), + REG_OFFSET_NAME_32("%cx", ecx), + REG_OFFSET_NAME_32("%dx", edx), + REG_OFFSET_NAME_32("%bx", ebx), + REG_OFFSET_NAME_32("$stack", esp), /* Stack address instead of %sp */ + REG_OFFSET_NAME_32("%bp", ebp), + REG_OFFSET_NAME_32("%si", esi), + REG_OFFSET_NAME_32("%di", edi), + REG_OFFSET_END, +}; + +#define regoffset_table x86_32_regoffset_table +#else +static const struct pt_regs_offset x86_64_regoffset_table[] = { + REG_OFFSET_NAME_64("%ax", rax), + REG_OFFSET_NAME_64("%dx", rdx), + REG_OFFSET_NAME_64("%cx", rcx), + REG_OFFSET_NAME_64("%bx", rbx), + REG_OFFSET_NAME_64("%si", rsi), + REG_OFFSET_NAME_64("%di", rdi), + REG_OFFSET_NAME_64("%bp", rbp), + REG_OFFSET_NAME_64("%sp", rsp), + REG_OFFSET_NAME_64("%r8", r8), + REG_OFFSET_NAME_64("%r9", r9), + REG_OFFSET_NAME_64("%r10", r10), + REG_OFFSET_NAME_64("%r11", r11), + REG_OFFSET_NAME_64("%r12", r12), + REG_OFFSET_NAME_64("%r13", r13), + REG_OFFSET_NAME_64("%r14", r14), + REG_OFFSET_NAME_64("%r15", r15), + REG_OFFSET_END, +}; + +#define regoffset_table x86_64_regoffset_table +#endif + +/* Minus 1 for the ending REG_OFFSET_END */ +#define ARCH_MAX_REGS ((sizeof(regoffset_table) / sizeof(regoffset_table[0])) - 1) + +/* Return architecture dependent register string (for kprobe-tracer) */ +const char *get_arch_regstr(unsigned int n) +{ + return (n < ARCH_MAX_REGS) ? regoffset_table[n].name : NULL; +} + +/* Reuse code from arch/x86/kernel/ptrace.c */ +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} diff --git a/tools/perf/arch/x86/util/env.c b/tools/perf/arch/x86/util/env.c new file mode 100644 index 000000000..3e537ffb1 --- /dev/null +++ b/tools/perf/arch/x86/util/env.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "linux/string.h" +#include "util/env.h" +#include "env.h" + +bool x86__is_amd_cpu(void) +{ + struct perf_env env = { .total_mem = 0, }; + static int is_amd; /* 0: Uninitialized, 1: Yes, -1: No */ + + if (is_amd) + goto ret; + + perf_env__cpuid(&env); + is_amd = env.cpuid && strstarts(env.cpuid, "AuthenticAMD") ? 1 : -1; + perf_env__exit(&env); +ret: + return is_amd >= 1 ? true : false; +} diff --git a/tools/perf/arch/x86/util/env.h b/tools/perf/arch/x86/util/env.h new file mode 100644 index 000000000..d78f080b6 --- /dev/null +++ b/tools/perf/arch/x86/util/env.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _X86_ENV_H +#define _X86_ENV_H + +bool x86__is_amd_cpu(void); + +#endif /* _X86_ENV_H */ diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c new file mode 100644 index 000000000..e670f3547 --- /dev/null +++ b/tools/perf/arch/x86/util/event.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/types.h> +#include <linux/string.h> +#include <linux/zalloc.h> + +#include "../../../util/event.h" +#include "../../../util/synthetic-events.h" +#include "../../../util/machine.h" +#include "../../../util/tool.h" +#include "../../../util/map.h" +#include "../../../util/debug.h" + +#if defined(__x86_64__) + +int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + int rc = 0; + struct map *pos; + struct maps *kmaps = machine__kernel_maps(machine); + union perf_event *event = zalloc(sizeof(event->mmap) + + machine->id_hdr_size); + + if (!event) { + pr_debug("Not enough memory synthesizing mmap event " + "for extra kernel maps\n"); + return -1; + } + + maps__for_each_entry(kmaps, pos) { + struct kmap *kmap; + size_t size; + + if (!__map__is_extra_kernel_map(pos)) + continue; + + kmap = map__kmap(pos); + + size = sizeof(event->mmap) - sizeof(event->mmap.filename) + + PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) + + machine->id_hdr_size; + + memset(event, 0, size); + + event->mmap.header.type = PERF_RECORD_MMAP; + + /* + * kernel uses 0 for user space maps, see kernel/perf_event.c + * __perf_event_mmap + */ + if (machine__is_host(machine)) + event->header.misc = PERF_RECORD_MISC_KERNEL; + else + event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; + + event->mmap.header.size = size; + + event->mmap.start = pos->start; + event->mmap.len = pos->end - pos->start; + event->mmap.pgoff = pos->pgoff; + event->mmap.pid = machine->pid; + + strlcpy(event->mmap.filename, kmap->name, PATH_MAX); + + if (perf_tool__process_synth_event(tool, event, machine, + process) != 0) { + rc = -1; + break; + } + } + + free(event); + return rc; +} + +#endif + +void arch_perf_parse_sample_weight(struct perf_sample *data, + const __u64 *array, u64 type) +{ + union perf_sample_weight weight; + + weight.full = *array; + if (type & PERF_SAMPLE_WEIGHT) + data->weight = weight.full; + else { + data->weight = weight.var1_dw; + data->ins_lat = weight.var2_w; + } +} + +void arch_perf_synthesize_sample_weight(const struct perf_sample *data, + __u64 *array, u64 type) +{ + *array = data->weight; + + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { + *array &= 0xffffffff; + *array |= ((u64)data->ins_lat << 32); + } +} diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c new file mode 100644 index 000000000..cb59ce9b9 --- /dev/null +++ b/tools/perf/arch/x86/util/evlist.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include "util/pmu.h" +#include "util/evlist.h" +#include "util/parse-events.h" +#include "util/event.h" +#include "util/pmu-hybrid.h" +#include "topdown.h" + +static int ___evlist__add_default_attrs(struct evlist *evlist, + struct perf_event_attr *attrs, + size_t nr_attrs) +{ + struct perf_cpu_map *cpus; + struct evsel *evsel, *n; + struct perf_pmu *pmu; + LIST_HEAD(head); + size_t i = 0; + + for (i = 0; i < nr_attrs; i++) + event_attr_init(attrs + i); + + if (!perf_pmu__has_hybrid()) + return evlist__add_attrs(evlist, attrs, nr_attrs); + + for (i = 0; i < nr_attrs; i++) { + if (attrs[i].type == PERF_TYPE_SOFTWARE) { + evsel = evsel__new(attrs + i); + if (evsel == NULL) + goto out_delete_partial_list; + list_add_tail(&evsel->core.node, &head); + continue; + } + + perf_pmu__for_each_hybrid_pmu(pmu) { + evsel = evsel__new(attrs + i); + if (evsel == NULL) + goto out_delete_partial_list; + evsel->core.attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + cpus = perf_cpu_map__get(pmu->cpus); + evsel->core.cpus = cpus; + evsel->core.own_cpus = perf_cpu_map__get(cpus); + evsel->pmu_name = strdup(pmu->name); + list_add_tail(&evsel->core.node, &head); + } + } + + evlist__splice_list_tail(evlist, &head); + + return 0; + +out_delete_partial_list: + __evlist__for_each_entry_safe(&head, n, evsel) + evsel__delete(evsel); + return -1; +} + +int arch_evlist__add_default_attrs(struct evlist *evlist, + struct perf_event_attr *attrs, + size_t nr_attrs) +{ + if (nr_attrs) + return ___evlist__add_default_attrs(evlist, attrs, nr_attrs); + + return topdown_parse_events(evlist); +} + +struct evsel *arch_evlist__leader(struct list_head *list) +{ + struct evsel *evsel, *first, *slots = NULL; + bool has_topdown = false; + + first = list_first_entry(list, struct evsel, core.node); + + if (!topdown_sys_has_perf_metrics()) + return first; + + /* If there is a slots event and a topdown event then the slots event comes first. */ + __evlist__for_each_entry(list, evsel) { + if (evsel->pmu_name && !strncmp(evsel->pmu_name, "cpu", 3) && evsel->name) { + if (strcasestr(evsel->name, "slots")) { + slots = evsel; + if (slots == first) + return first; + } + if (strcasestr(evsel->name, "topdown")) + has_topdown = true; + if (slots && has_topdown) + return slots; + } + } + return first; +} diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c new file mode 100644 index 000000000..d72390cdf --- /dev/null +++ b/tools/perf/arch/x86/util/evsel.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <stdlib.h> +#include "util/evsel.h" +#include "util/env.h" +#include "util/pmu.h" +#include "linux/string.h" +#include "evsel.h" +#include "util/debug.h" +#include "env.h" + +#define IBS_FETCH_L3MISSONLY (1ULL << 59) +#define IBS_OP_L3MISSONLY (1ULL << 16) + +void arch_evsel__set_sample_weight(struct evsel *evsel) +{ + evsel__set_sample_bit(evsel, WEIGHT_STRUCT); +} + +void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr) +{ + struct perf_env env = { .total_mem = 0, } ; + + if (!perf_env__cpuid(&env)) + return; + + /* + * On AMD, precise cycles event sampling internally uses IBS pmu. + * But IBS does not have filtering capabilities and perf by default + * sets exclude_guest = 1. This makes IBS pmu event init fail and + * thus perf ends up doing non-precise sampling. Avoid it by clearing + * exclude_guest. + */ + if (env.cpuid && strstarts(env.cpuid, "AuthenticAMD")) + attr->exclude_guest = 0; + + free(env.cpuid); +} + +/* Check whether the evsel's PMU supports the perf metrics */ +bool evsel__sys_has_perf_metrics(const struct evsel *evsel) +{ + const char *pmu_name = evsel->pmu_name ? evsel->pmu_name : "cpu"; + + /* + * The PERF_TYPE_RAW type is the core PMU type, e.g., "cpu" PMU + * on a non-hybrid machine, "cpu_core" PMU on a hybrid machine. + * The slots event is only available for the core PMU, which + * supports the perf metrics feature. + * Checking both the PERF_TYPE_RAW type and the slots event + * should be good enough to detect the perf metrics feature. + */ + if ((evsel->core.attr.type == PERF_TYPE_RAW) && + pmu_have_event(pmu_name, "slots")) + return true; + + return false; +} + +bool arch_evsel__must_be_in_group(const struct evsel *evsel) +{ + if (!evsel__sys_has_perf_metrics(evsel)) + return false; + + return evsel->name && + (strcasestr(evsel->name, "slots") || + strcasestr(evsel->name, "topdown")); +} + +int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size) +{ + u64 event = evsel->core.attr.config & PERF_HW_EVENT_MASK; + u64 pmu = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT; + const char *event_name; + + if (event < PERF_COUNT_HW_MAX && evsel__hw_names[event]) + event_name = evsel__hw_names[event]; + else + event_name = "unknown-hardware"; + + /* The PMU type is not required for the non-hybrid platform. */ + if (!pmu) + return scnprintf(bf, size, "%s", event_name); + + return scnprintf(bf, size, "%s/%s/", + evsel->pmu_name ? evsel->pmu_name : "cpu", + event_name); +} + +static void ibs_l3miss_warn(void) +{ + pr_warning( +"WARNING: Hw internally resets sampling period when L3 Miss Filtering is enabled\n" +"and tagged operation does not cause L3 Miss. This causes sampling period skew.\n"); +} + +void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr) +{ + struct perf_pmu *evsel_pmu, *ibs_fetch_pmu, *ibs_op_pmu; + static int warned_once; + + if (warned_once || !x86__is_amd_cpu()) + return; + + evsel_pmu = evsel__find_pmu(evsel); + if (!evsel_pmu) + return; + + ibs_fetch_pmu = perf_pmu__find("ibs_fetch"); + ibs_op_pmu = perf_pmu__find("ibs_op"); + + if (ibs_fetch_pmu && ibs_fetch_pmu->type == evsel_pmu->type) { + if (attr->config & IBS_FETCH_L3MISSONLY) { + ibs_l3miss_warn(); + warned_once = 1; + } + } else if (ibs_op_pmu && ibs_op_pmu->type == evsel_pmu->type) { + if (attr->config & IBS_OP_L3MISSONLY) { + ibs_l3miss_warn(); + warned_once = 1; + } + } +} diff --git a/tools/perf/arch/x86/util/evsel.h b/tools/perf/arch/x86/util/evsel.h new file mode 100644 index 000000000..19ad16913 --- /dev/null +++ b/tools/perf/arch/x86/util/evsel.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _EVSEL_H +#define _EVSEL_H 1 + +bool evsel__sys_has_perf_metrics(const struct evsel *evsel); + +#endif diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c new file mode 100644 index 000000000..a51444a77 --- /dev/null +++ b/tools/perf/arch/x86/util/header.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <sys/types.h> +#include <errno.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <regex.h> + +#include "../../../util/debug.h" +#include "../../../util/header.h" +#include "cpuid.h" + +void get_cpuid_0(char *vendor, unsigned int *lvl) +{ + unsigned int b, c, d; + + cpuid(0, 0, lvl, &b, &c, &d); + strncpy(&vendor[0], (char *)(&b), 4); + strncpy(&vendor[4], (char *)(&d), 4); + strncpy(&vendor[8], (char *)(&c), 4); + vendor[12] = '\0'; +} + +static int +__get_cpuid(char *buffer, size_t sz, const char *fmt) +{ + unsigned int a, b, c, d, lvl; + int family = -1, model = -1, step = -1; + int nb; + char vendor[16]; + + get_cpuid_0(vendor, &lvl); + + if (lvl >= 1) { + cpuid(1, 0, &a, &b, &c, &d); + + family = (a >> 8) & 0xf; /* bits 11 - 8 */ + model = (a >> 4) & 0xf; /* Bits 7 - 4 */ + step = a & 0xf; + + /* extended family */ + if (family == 0xf) + family += (a >> 20) & 0xff; + + /* extended model */ + if (family >= 0x6) + model += ((a >> 16) & 0xf) << 4; + } + nb = scnprintf(buffer, sz, fmt, vendor, family, model, step); + + /* look for end marker to ensure the entire data fit */ + if (strchr(buffer, '$')) { + buffer[nb-1] = '\0'; + return 0; + } + return ENOBUFS; +} + +int +get_cpuid(char *buffer, size_t sz) +{ + return __get_cpuid(buffer, sz, "%s,%u,%u,%u$"); +} + +char * +get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +{ + char *buf = malloc(128); + + if (buf && __get_cpuid(buf, 128, "%s-%u-%X-%X$") < 0) { + free(buf); + return NULL; + } + return buf; +} + +/* Full CPUID format for x86 is vendor-family-model-stepping */ +static bool is_full_cpuid(const char *id) +{ + const char *tmp = id; + int count = 0; + + while ((tmp = strchr(tmp, '-')) != NULL) { + count++; + tmp++; + } + + if (count == 3) + return true; + + return false; +} + +int strcmp_cpuid_str(const char *mapcpuid, const char *id) +{ + regex_t re; + regmatch_t pmatch[1]; + int match; + bool full_mapcpuid = is_full_cpuid(mapcpuid); + bool full_cpuid = is_full_cpuid(id); + + /* + * Full CPUID format is required to identify a platform. + * Error out if the cpuid string is incomplete. + */ + if (full_mapcpuid && !full_cpuid) { + pr_info("Invalid CPUID %s. Full CPUID is required, " + "vendor-family-model-stepping\n", id); + return 1; + } + + if (regcomp(&re, mapcpuid, REG_EXTENDED) != 0) { + /* Warn unable to generate match particular string. */ + pr_info("Invalid regular expression %s\n", mapcpuid); + return 1; + } + + match = !regexec(&re, id, 1, pmatch, 0); + regfree(&re); + if (match) { + size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so); + size_t cpuid_len; + + /* If the full CPUID format isn't required, + * ignoring the stepping. + */ + if (!full_mapcpuid && full_cpuid) + cpuid_len = strrchr(id, '-') - id; + else + cpuid_len = strlen(id); + + /* Verify the entire string matched. */ + if (match_len == cpuid_len) + return 0; + } + + return 1; +} diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c new file mode 100644 index 000000000..439c2956f --- /dev/null +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -0,0 +1,450 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * intel-bts.c: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + */ + +#include <errno.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/log2.h> +#include <linux/zalloc.h> + +#include "../../../util/cpumap.h" +#include "../../../util/event.h" +#include "../../../util/evsel.h" +#include "../../../util/evlist.h" +#include "../../../util/mmap.h" +#include "../../../util/session.h" +#include "../../../util/pmu.h" +#include "../../../util/debug.h" +#include "../../../util/record.h" +#include "../../../util/tsc.h" +#include "../../../util/auxtrace.h" +#include "../../../util/intel-bts.h" +#include <internal/lib.h> // page_size + +#define KiB(x) ((x) * 1024) +#define MiB(x) ((x) * 1024 * 1024) +#define KiB_MASK(x) (KiB(x) - 1) +#define MiB_MASK(x) (MiB(x) - 1) + +struct intel_bts_snapshot_ref { + void *ref_buf; + size_t ref_offset; + bool wrapped; +}; + +struct intel_bts_recording { + struct auxtrace_record itr; + struct perf_pmu *intel_bts_pmu; + struct evlist *evlist; + bool snapshot_mode; + size_t snapshot_size; + int snapshot_ref_cnt; + struct intel_bts_snapshot_ref *snapshot_refs; +}; + +struct branch { + u64 from; + u64 to; + u64 misc; +}; + +static size_t +intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused, + struct evlist *evlist __maybe_unused) +{ + return INTEL_BTS_AUXTRACE_PRIV_SIZE; +} + +static int intel_bts_info_fill(struct auxtrace_record *itr, + struct perf_session *session, + struct perf_record_auxtrace_info *auxtrace_info, + size_t priv_size) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu; + struct perf_event_mmap_page *pc; + struct perf_tsc_conversion tc = { .time_mult = 0, }; + bool cap_user_time_zero = false; + int err; + + if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE) + return -EINVAL; + + if (!session->evlist->core.nr_mmaps) + return -EINVAL; + + pc = session->evlist->mmap[0].core.base; + if (pc) { + err = perf_read_tsc_conversion(pc, &tc); + if (err) { + if (err != -EOPNOTSUPP) + return err; + } else { + cap_user_time_zero = tc.time_mult != 0; + } + if (!cap_user_time_zero) + ui__warning("Intel BTS: TSC not available\n"); + } + + auxtrace_info->type = PERF_AUXTRACE_INTEL_BTS; + auxtrace_info->priv[INTEL_BTS_PMU_TYPE] = intel_bts_pmu->type; + auxtrace_info->priv[INTEL_BTS_TIME_SHIFT] = tc.time_shift; + auxtrace_info->priv[INTEL_BTS_TIME_MULT] = tc.time_mult; + auxtrace_info->priv[INTEL_BTS_TIME_ZERO] = tc.time_zero; + auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO] = cap_user_time_zero; + auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE] = btsr->snapshot_mode; + + return 0; +} + +static int intel_bts_recording_options(struct auxtrace_record *itr, + struct evlist *evlist, + struct record_opts *opts) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu; + struct evsel *evsel, *intel_bts_evsel = NULL; + const struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; + bool privileged = perf_event_paranoid_check(-1); + + if (opts->auxtrace_sample_mode) { + pr_err("Intel BTS does not support AUX area sampling\n"); + return -EINVAL; + } + + btsr->evlist = evlist; + btsr->snapshot_mode = opts->auxtrace_snapshot_mode; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.type == intel_bts_pmu->type) { + if (intel_bts_evsel) { + pr_err("There may be only one " INTEL_BTS_PMU_NAME " event\n"); + return -EINVAL; + } + evsel->core.attr.freq = 0; + evsel->core.attr.sample_period = 1; + evsel->needs_auxtrace_mmap = true; + intel_bts_evsel = evsel; + opts->full_auxtrace = true; + } + } + + if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { + pr_err("Snapshot mode (-S option) requires " INTEL_BTS_PMU_NAME " PMU event (-e " INTEL_BTS_PMU_NAME ")\n"); + return -EINVAL; + } + + if (!opts->full_auxtrace) + return 0; + + if (opts->full_auxtrace && !perf_cpu_map__empty(cpus)) { + pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n"); + return -EINVAL; + } + + /* Set default sizes for snapshot mode */ + if (opts->auxtrace_snapshot_mode) { + if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } else if (!opts->auxtrace_mmap_pages && !privileged && + opts->mmap_pages == UINT_MAX) { + opts->mmap_pages = KiB(256) / page_size; + } + if (!opts->auxtrace_snapshot_size) + opts->auxtrace_snapshot_size = + opts->auxtrace_mmap_pages * (size_t)page_size; + if (!opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_snapshot_size; + + sz = round_up(sz, page_size) / page_size; + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); + } + if (opts->auxtrace_snapshot_size > + opts->auxtrace_mmap_pages * (size_t)page_size) { + pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", + opts->auxtrace_snapshot_size, + opts->auxtrace_mmap_pages * (size_t)page_size); + return -EINVAL; + } + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { + pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); + return -EINVAL; + } + pr_debug2("Intel BTS snapshot size: %zu\n", + opts->auxtrace_snapshot_size); + } + + /* Set default sizes for full trace mode */ + if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } + + /* Validate auxtrace_mmap_pages */ + if (opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; + size_t min_sz; + + if (opts->auxtrace_snapshot_mode) + min_sz = KiB(4); + else + min_sz = KiB(8); + + if (sz < min_sz || !is_power_of_2(sz)) { + pr_err("Invalid mmap size for Intel BTS: must be at least %zuKiB and a power of 2\n", + min_sz / 1024); + return -EINVAL; + } + } + + if (intel_bts_evsel) { + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace event + * must come first. + */ + evlist__to_front(evlist, intel_bts_evsel); + /* + * In the case of per-cpu mmaps, we need the CPU on the + * AUX event. + */ + if (!perf_cpu_map__empty(cpus)) + evsel__set_sample_bit(intel_bts_evsel, CPU); + } + + /* Add dummy event to keep tracking */ + if (opts->full_auxtrace) { + struct evsel *tracking_evsel; + int err; + + err = parse_event(evlist, "dummy:u"); + if (err) + return err; + + tracking_evsel = evlist__last(evlist); + + evlist__set_tracking_event(evlist, tracking_evsel); + + tracking_evsel->core.attr.freq = 0; + tracking_evsel->core.attr.sample_period = 1; + } + + return 0; +} + +static int intel_bts_parse_snapshot_options(struct auxtrace_record *itr, + struct record_opts *opts, + const char *str) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + unsigned long long snapshot_size = 0; + char *endptr; + + if (str) { + snapshot_size = strtoull(str, &endptr, 0); + if (*endptr || snapshot_size > SIZE_MAX) + return -1; + } + + opts->auxtrace_snapshot_mode = true; + opts->auxtrace_snapshot_size = snapshot_size; + + btsr->snapshot_size = snapshot_size; + + return 0; +} + +static u64 intel_bts_reference(struct auxtrace_record *itr __maybe_unused) +{ + return rdtsc(); +} + +static int intel_bts_alloc_snapshot_refs(struct intel_bts_recording *btsr, + int idx) +{ + const size_t sz = sizeof(struct intel_bts_snapshot_ref); + int cnt = btsr->snapshot_ref_cnt, new_cnt = cnt * 2; + struct intel_bts_snapshot_ref *refs; + + if (!new_cnt) + new_cnt = 16; + + while (new_cnt <= idx) + new_cnt *= 2; + + refs = calloc(new_cnt, sz); + if (!refs) + return -ENOMEM; + + memcpy(refs, btsr->snapshot_refs, cnt * sz); + + btsr->snapshot_refs = refs; + btsr->snapshot_ref_cnt = new_cnt; + + return 0; +} + +static void intel_bts_free_snapshot_refs(struct intel_bts_recording *btsr) +{ + int i; + + for (i = 0; i < btsr->snapshot_ref_cnt; i++) + zfree(&btsr->snapshot_refs[i].ref_buf); + zfree(&btsr->snapshot_refs); +} + +static void intel_bts_recording_free(struct auxtrace_record *itr) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + + intel_bts_free_snapshot_refs(btsr); + free(btsr); +} + +static int intel_bts_snapshot_start(struct auxtrace_record *itr) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct evsel *evsel; + + evlist__for_each_entry(btsr->evlist, evsel) { + if (evsel->core.attr.type == btsr->intel_bts_pmu->type) + return evsel__disable(evsel); + } + return -EINVAL; +} + +static int intel_bts_snapshot_finish(struct auxtrace_record *itr) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + struct evsel *evsel; + + evlist__for_each_entry(btsr->evlist, evsel) { + if (evsel->core.attr.type == btsr->intel_bts_pmu->type) + return evsel__enable(evsel); + } + return -EINVAL; +} + +static bool intel_bts_first_wrap(u64 *data, size_t buf_size) +{ + int i, a, b; + + b = buf_size >> 3; + a = b - 512; + if (a < 0) + a = 0; + + for (i = a; i < b; i++) { + if (data[i]) + return true; + } + + return false; +} + +static int intel_bts_find_snapshot(struct auxtrace_record *itr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 *head, u64 *old) +{ + struct intel_bts_recording *btsr = + container_of(itr, struct intel_bts_recording, itr); + bool wrapped; + int err; + + pr_debug3("%s: mmap index %d old head %zu new head %zu\n", + __func__, idx, (size_t)*old, (size_t)*head); + + if (idx >= btsr->snapshot_ref_cnt) { + err = intel_bts_alloc_snapshot_refs(btsr, idx); + if (err) + goto out_err; + } + + wrapped = btsr->snapshot_refs[idx].wrapped; + if (!wrapped && intel_bts_first_wrap((u64 *)data, mm->len)) { + btsr->snapshot_refs[idx].wrapped = true; + wrapped = true; + } + + /* + * In full trace mode 'head' continually increases. However in snapshot + * mode 'head' is an offset within the buffer. Here 'old' and 'head' + * are adjusted to match the full trace case which expects that 'old' is + * always less than 'head'. + */ + if (wrapped) { + *old = *head; + *head += mm->len; + } else { + if (mm->mask) + *old &= mm->mask; + else + *old %= mm->len; + if (*old > *head) + *head += mm->len; + } + + pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", + __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); + + return 0; + +out_err: + pr_err("%s: failed, error %d\n", __func__, err); + return err; +} + +struct auxtrace_record *intel_bts_recording_init(int *err) +{ + struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); + struct intel_bts_recording *btsr; + + if (!intel_bts_pmu) + return NULL; + + if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) { + *err = -errno; + return NULL; + } + + btsr = zalloc(sizeof(struct intel_bts_recording)); + if (!btsr) { + *err = -ENOMEM; + return NULL; + } + + btsr->intel_bts_pmu = intel_bts_pmu; + btsr->itr.pmu = intel_bts_pmu; + btsr->itr.recording_options = intel_bts_recording_options; + btsr->itr.info_priv_size = intel_bts_info_priv_size; + btsr->itr.info_fill = intel_bts_info_fill; + btsr->itr.free = intel_bts_recording_free; + btsr->itr.snapshot_start = intel_bts_snapshot_start; + btsr->itr.snapshot_finish = intel_bts_snapshot_finish; + btsr->itr.find_snapshot = intel_bts_find_snapshot; + btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options; + btsr->itr.reference = intel_bts_reference; + btsr->itr.read_finish = auxtrace_record__read_finish; + btsr->itr.alignment = sizeof(struct branch); + return &btsr->itr; +} diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c new file mode 100644 index 000000000..af102f471 --- /dev/null +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -0,0 +1,1225 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * intel_pt.c: Intel Processor Trace support + * Copyright (c) 2013-2015, Intel Corporation. + */ + +#include <errno.h> +#include <stdbool.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/log2.h> +#include <linux/zalloc.h> +#include <linux/err.h> +#include <cpuid.h> + +#include "../../../util/session.h" +#include "../../../util/event.h" +#include "../../../util/evlist.h" +#include "../../../util/evsel.h" +#include "../../../util/evsel_config.h" +#include "../../../util/cpumap.h" +#include "../../../util/mmap.h" +#include <subcmd/parse-options.h> +#include "../../../util/parse-events.h" +#include "../../../util/pmu.h" +#include "../../../util/debug.h" +#include "../../../util/auxtrace.h" +#include "../../../util/perf_api_probe.h" +#include "../../../util/record.h" +#include "../../../util/target.h" +#include "../../../util/tsc.h" +#include <internal/lib.h> // page_size +#include "../../../util/intel-pt.h" + +#define KiB(x) ((x) * 1024) +#define MiB(x) ((x) * 1024 * 1024) +#define KiB_MASK(x) (KiB(x) - 1) +#define MiB_MASK(x) (MiB(x) - 1) + +#define INTEL_PT_PSB_PERIOD_NEAR 256 + +struct intel_pt_snapshot_ref { + void *ref_buf; + size_t ref_offset; + bool wrapped; +}; + +struct intel_pt_recording { + struct auxtrace_record itr; + struct perf_pmu *intel_pt_pmu; + int have_sched_switch; + struct evlist *evlist; + bool snapshot_mode; + bool snapshot_init_done; + size_t snapshot_size; + size_t snapshot_ref_buf_size; + int snapshot_ref_cnt; + struct intel_pt_snapshot_ref *snapshot_refs; + size_t priv_size; +}; + +static int intel_pt_parse_terms_with_default(const char *pmu_name, + struct list_head *formats, + const char *str, + u64 *config) +{ + struct list_head *terms; + struct perf_event_attr attr = { .size = 0, }; + int err; + + terms = malloc(sizeof(struct list_head)); + if (!terms) + return -ENOMEM; + + INIT_LIST_HEAD(terms); + + err = parse_events_terms(terms, str); + if (err) + goto out_free; + + attr.config = *config; + err = perf_pmu__config_terms(pmu_name, formats, &attr, terms, true, + NULL); + if (err) + goto out_free; + + *config = attr.config; +out_free: + parse_events_terms__delete(terms); + return err; +} + +static int intel_pt_parse_terms(const char *pmu_name, struct list_head *formats, + const char *str, u64 *config) +{ + *config = 0; + return intel_pt_parse_terms_with_default(pmu_name, formats, str, + config); +} + +static u64 intel_pt_masked_bits(u64 mask, u64 bits) +{ + const u64 top_bit = 1ULL << 63; + u64 res = 0; + int i; + + for (i = 0; i < 64; i++) { + if (mask & top_bit) { + res <<= 1; + if (bits & top_bit) + res |= 1; + } + mask <<= 1; + bits <<= 1; + } + + return res; +} + +static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str, + struct evlist *evlist, u64 *res) +{ + struct evsel *evsel; + u64 mask; + + *res = 0; + + mask = perf_pmu__format_bits(&intel_pt_pmu->format, str); + if (!mask) + return -EINVAL; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.type == intel_pt_pmu->type) { + *res = intel_pt_masked_bits(mask, evsel->core.attr.config); + return 0; + } + } + + return -EINVAL; +} + +static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu, + struct evlist *evlist) +{ + u64 val; + int err, topa_multiple_entries; + size_t psb_period; + + if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries", + "%d", &topa_multiple_entries) != 1) + topa_multiple_entries = 0; + + /* + * Use caps/topa_multiple_entries to indicate early hardware that had + * extra frequent PSBs. + */ + if (!topa_multiple_entries) { + psb_period = 256; + goto out; + } + + err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val); + if (err) + val = 0; + + psb_period = 1 << (val + 11); +out: + pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period); + return psb_period; +} + +static int intel_pt_pick_bit(int bits, int target) +{ + int pos, pick = -1; + + for (pos = 0; bits; bits >>= 1, pos++) { + if (bits & 1) { + if (pos <= target || pick < 0) + pick = pos; + if (pos >= target) + break; + } + } + + return pick; +} + +static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) +{ + char buf[256]; + int mtc, mtc_periods = 0, mtc_period; + int psb_cyc, psb_periods, psb_period; + int pos = 0; + u64 config; + char c; + + pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); + + if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d", + &mtc) != 1) + mtc = 1; + + if (mtc) { + if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x", + &mtc_periods) != 1) + mtc_periods = 0; + if (mtc_periods) { + mtc_period = intel_pt_pick_bit(mtc_periods, 3); + pos += scnprintf(buf + pos, sizeof(buf) - pos, + ",mtc,mtc_period=%d", mtc_period); + } + } + + if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d", + &psb_cyc) != 1) + psb_cyc = 1; + + if (psb_cyc && mtc_periods) { + if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x", + &psb_periods) != 1) + psb_periods = 0; + if (psb_periods) { + psb_period = intel_pt_pick_bit(psb_periods, 3); + pos += scnprintf(buf + pos, sizeof(buf) - pos, + ",psb_period=%d", psb_period); + } + } + + if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 && + perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1) + pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch"); + + pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); + + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, buf, + &config); + + return config; +} + +static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr, + struct record_opts *opts, + const char *str) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + unsigned long long snapshot_size = 0; + char *endptr; + + if (str) { + snapshot_size = strtoull(str, &endptr, 0); + if (*endptr || snapshot_size > SIZE_MAX) + return -1; + } + + opts->auxtrace_snapshot_mode = true; + opts->auxtrace_snapshot_size = snapshot_size; + + ptr->snapshot_size = snapshot_size; + + return 0; +} + +struct perf_event_attr * +intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu) +{ + struct perf_event_attr *attr; + + attr = zalloc(sizeof(struct perf_event_attr)); + if (!attr) + return NULL; + + attr->config = intel_pt_default_config(intel_pt_pmu); + + intel_pt_pmu->selectable = true; + + return attr; +} + +static const char *intel_pt_find_filter(struct evlist *evlist, + struct perf_pmu *intel_pt_pmu) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.type == intel_pt_pmu->type) + return evsel->filter; + } + + return NULL; +} + +static size_t intel_pt_filter_bytes(const char *filter) +{ + size_t len = filter ? strlen(filter) : 0; + + return len ? roundup(len + 1, 8) : 0; +} + +static size_t +intel_pt_info_priv_size(struct auxtrace_record *itr, struct evlist *evlist) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + const char *filter = intel_pt_find_filter(evlist, ptr->intel_pt_pmu); + + ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) + + intel_pt_filter_bytes(filter); + ptr->priv_size += sizeof(u64); /* Cap Event Trace */ + + return ptr->priv_size; +} + +static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d) +{ + unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; + + __get_cpuid(0x15, &eax, &ebx, &ecx, &edx); + *n = ebx; + *d = eax; +} + +static int intel_pt_info_fill(struct auxtrace_record *itr, + struct perf_session *session, + struct perf_record_auxtrace_info *auxtrace_info, + size_t priv_size) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; + struct perf_event_mmap_page *pc; + struct perf_tsc_conversion tc = { .time_mult = 0, }; + bool cap_user_time_zero = false, per_cpu_mmaps; + u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit; + u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d; + unsigned long max_non_turbo_ratio; + size_t filter_str_len; + const char *filter; + int event_trace; + __u64 *info; + int err; + + if (priv_size != ptr->priv_size) + return -EINVAL; + + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, + "tsc", &tsc_bit); + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, + "noretcomp", &noretcomp_bit); + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, + "mtc", &mtc_bit); + mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format, + "mtc_period"); + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, + "cyc", &cyc_bit); + + intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d); + + if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio", + "%lu", &max_non_turbo_ratio) != 1) + max_non_turbo_ratio = 0; + if (perf_pmu__scan_file(intel_pt_pmu, "caps/event_trace", + "%d", &event_trace) != 1) + event_trace = 0; + + filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu); + filter_str_len = filter ? strlen(filter) : 0; + + if (!session->evlist->core.nr_mmaps) + return -EINVAL; + + pc = session->evlist->mmap[0].core.base; + if (pc) { + err = perf_read_tsc_conversion(pc, &tc); + if (err) { + if (err != -EOPNOTSUPP) + return err; + } else { + cap_user_time_zero = tc.time_mult != 0; + } + if (!cap_user_time_zero) + ui__warning("Intel Processor Trace: TSC not available\n"); + } + + per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.user_requested_cpus); + + auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; + auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; + auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift; + auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult; + auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero; + auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero; + auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit; + auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit; + auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch; + auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode; + auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps; + auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit; + auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits; + auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n; + auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d; + auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit; + auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO] = max_non_turbo_ratio; + auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] = filter_str_len; + + info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1; + + if (filter_str_len) { + size_t len = intel_pt_filter_bytes(filter); + + strncpy((char *)info, filter, len); + info += len >> 3; + } + + *info++ = event_trace; + + return 0; +} + +static int intel_pt_track_switches(struct evlist *evlist) +{ + const char *sched_switch = "sched:sched_switch"; + struct evsel *evsel; + int err; + + if (!evlist__can_select_event(evlist, sched_switch)) + return -EPERM; + + evsel = evlist__add_sched_switch(evlist, true); + if (IS_ERR(evsel)) { + err = PTR_ERR(evsel); + pr_debug2("%s: failed to create %s, error = %d\n", + __func__, sched_switch, err); + return err; + } + + evsel->immediate = true; + + return 0; +} + +static void intel_pt_valid_str(char *str, size_t len, u64 valid) +{ + unsigned int val, last = 0, state = 1; + int p = 0; + + str[0] = '\0'; + + for (val = 0; val <= 64; val++, valid >>= 1) { + if (valid & 1) { + last = val; + switch (state) { + case 0: + p += scnprintf(str + p, len - p, ","); + /* Fall through */ + case 1: + p += scnprintf(str + p, len - p, "%u", val); + state = 2; + break; + case 2: + state = 3; + break; + case 3: + state = 4; + break; + default: + break; + } + } else { + switch (state) { + case 3: + p += scnprintf(str + p, len - p, ",%u", last); + state = 0; + break; + case 4: + p += scnprintf(str + p, len - p, "-%u", last); + state = 0; + break; + default: + break; + } + if (state != 1) + state = 0; + } + } +} + +static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu, + const char *caps, const char *name, + const char *supported, u64 config) +{ + char valid_str[256]; + unsigned int shift; + unsigned long long valid; + u64 bits; + int ok; + + if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1) + valid = 0; + + if (supported && + perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok) + valid = 0; + + valid |= 1; + + bits = perf_pmu__format_bits(&intel_pt_pmu->format, name); + + config &= bits; + + for (shift = 0; bits && !(bits & 1); shift++) + bits >>= 1; + + config >>= shift; + + if (config > 63) + goto out_err; + + if (valid & (1 << config)) + return 0; +out_err: + intel_pt_valid_str(valid_str, sizeof(valid_str), valid); + pr_err("Invalid %s for %s. Valid values are: %s\n", + name, INTEL_PT_PMU_NAME, valid_str); + return -EINVAL; +} + +static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, + struct evsel *evsel) +{ + int err; + char c; + + if (!evsel) + return 0; + + /* + * If supported, force pass-through config term (pt=1) even if user + * sets pt=0, which avoids senseless kernel errors. + */ + if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 && + !(evsel->core.attr.config & 1)) { + pr_warning("pt=0 doesn't make sense, forcing pt=1\n"); + evsel->core.attr.config |= 1; + } + + err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds", + "cyc_thresh", "caps/psb_cyc", + evsel->core.attr.config); + if (err) + return err; + + err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods", + "mtc_period", "caps/mtc", + evsel->core.attr.config); + if (err) + return err; + + return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods", + "psb_period", "caps/psb_cyc", + evsel->core.attr.config); +} + +static void intel_pt_config_sample_mode(struct perf_pmu *intel_pt_pmu, + struct evsel *evsel) +{ + u64 user_bits = 0, bits; + struct evsel_config_term *term = evsel__get_config_term(evsel, CFG_CHG); + + if (term) + user_bits = term->val.cfg_chg; + + bits = perf_pmu__format_bits(&intel_pt_pmu->format, "psb_period"); + + /* Did user change psb_period */ + if (bits & user_bits) + return; + + /* Set psb_period to 0 */ + evsel->core.attr.config &= ~bits; +} + +static void intel_pt_min_max_sample_sz(struct evlist *evlist, + size_t *min_sz, size_t *max_sz) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + size_t sz = evsel->core.attr.aux_sample_size; + + if (!sz) + continue; + if (min_sz && (sz < *min_sz || !*min_sz)) + *min_sz = sz; + if (max_sz && sz > *max_sz) + *max_sz = sz; + } +} + +/* + * Currently, there is not enough information to disambiguate different PEBS + * events, so only allow one. + */ +static bool intel_pt_too_many_aux_output(struct evlist *evlist) +{ + struct evsel *evsel; + int aux_output_cnt = 0; + + evlist__for_each_entry(evlist, evsel) + aux_output_cnt += !!evsel->core.attr.aux_output; + + if (aux_output_cnt > 1) { + pr_err(INTEL_PT_PMU_NAME " supports at most one event with aux-output\n"); + return true; + } + + return false; +} + +static int intel_pt_recording_options(struct auxtrace_record *itr, + struct evlist *evlist, + struct record_opts *opts) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; + bool have_timing_info, need_immediate = false; + struct evsel *evsel, *intel_pt_evsel = NULL; + const struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; + bool privileged = perf_event_paranoid_check(-1); + u64 tsc_bit; + int err; + + ptr->evlist = evlist; + ptr->snapshot_mode = opts->auxtrace_snapshot_mode; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.type == intel_pt_pmu->type) { + if (intel_pt_evsel) { + pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n"); + return -EINVAL; + } + evsel->core.attr.freq = 0; + evsel->core.attr.sample_period = 1; + evsel->no_aux_samples = true; + evsel->needs_auxtrace_mmap = true; + intel_pt_evsel = evsel; + opts->full_auxtrace = true; + } + } + + if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { + pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n"); + return -EINVAL; + } + + if (opts->auxtrace_snapshot_mode && opts->auxtrace_sample_mode) { + pr_err("Snapshot mode (" INTEL_PT_PMU_NAME " PMU) and sample trace cannot be used together\n"); + return -EINVAL; + } + + if (opts->use_clockid) { + pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n"); + return -EINVAL; + } + + if (intel_pt_too_many_aux_output(evlist)) + return -EINVAL; + + if (!opts->full_auxtrace) + return 0; + + if (opts->auxtrace_sample_mode) + intel_pt_config_sample_mode(intel_pt_pmu, intel_pt_evsel); + + err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel); + if (err) + return err; + + /* Set default sizes for snapshot mode */ + if (opts->auxtrace_snapshot_mode) { + size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); + + if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } else if (!opts->auxtrace_mmap_pages && !privileged && + opts->mmap_pages == UINT_MAX) { + opts->mmap_pages = KiB(256) / page_size; + } + if (!opts->auxtrace_snapshot_size) + opts->auxtrace_snapshot_size = + opts->auxtrace_mmap_pages * (size_t)page_size; + if (!opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_snapshot_size; + + sz = round_up(sz, page_size) / page_size; + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); + } + if (opts->auxtrace_snapshot_size > + opts->auxtrace_mmap_pages * (size_t)page_size) { + pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", + opts->auxtrace_snapshot_size, + opts->auxtrace_mmap_pages * (size_t)page_size); + return -EINVAL; + } + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { + pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); + return -EINVAL; + } + pr_debug2("Intel PT snapshot size: %zu\n", + opts->auxtrace_snapshot_size); + if (psb_period && + opts->auxtrace_snapshot_size <= psb_period + + INTEL_PT_PSB_PERIOD_NEAR) + ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n", + opts->auxtrace_snapshot_size, psb_period); + } + + /* Set default sizes for sample mode */ + if (opts->auxtrace_sample_mode) { + size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); + size_t min_sz = 0, max_sz = 0; + + intel_pt_min_max_sample_sz(evlist, &min_sz, &max_sz); + if (!opts->auxtrace_mmap_pages && !privileged && + opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + if (!opts->auxtrace_mmap_pages) { + size_t sz = round_up(max_sz, page_size) / page_size; + + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); + } + if (max_sz > opts->auxtrace_mmap_pages * (size_t)page_size) { + pr_err("Sample size %zu must not be greater than AUX area tracing mmap size %zu\n", + max_sz, + opts->auxtrace_mmap_pages * (size_t)page_size); + return -EINVAL; + } + pr_debug2("Intel PT min. sample size: %zu max. sample size: %zu\n", + min_sz, max_sz); + if (psb_period && + min_sz <= psb_period + INTEL_PT_PSB_PERIOD_NEAR) + ui__warning("Intel PT sample size (%zu) may be too small for PSB period (%zu)\n", + min_sz, psb_period); + } + + /* Set default sizes for full trace mode */ + if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } + + /* Validate auxtrace_mmap_pages */ + if (opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; + size_t min_sz; + + if (opts->auxtrace_snapshot_mode || opts->auxtrace_sample_mode) + min_sz = KiB(4); + else + min_sz = KiB(8); + + if (sz < min_sz || !is_power_of_2(sz)) { + pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n", + min_sz / 1024); + return -EINVAL; + } + } + + if (!opts->auxtrace_snapshot_mode && !opts->auxtrace_sample_mode) { + u32 aux_watermark = opts->auxtrace_mmap_pages * page_size / 4; + + intel_pt_evsel->core.attr.aux_watermark = aux_watermark; + } + + intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, + "tsc", &tsc_bit); + + if (opts->full_auxtrace && (intel_pt_evsel->core.attr.config & tsc_bit)) + have_timing_info = true; + else + have_timing_info = false; + + /* + * Per-cpu recording needs sched_switch events to distinguish different + * threads. + */ + if (have_timing_info && !perf_cpu_map__empty(cpus) && + !record_opts__no_switch_events(opts)) { + if (perf_can_record_switch_events()) { + bool cpu_wide = !target__none(&opts->target) && + !target__has_task(&opts->target); + + if (!cpu_wide && perf_can_record_cpu_wide()) { + struct evsel *switch_evsel; + + switch_evsel = evlist__add_dummy_on_all_cpus(evlist); + if (!switch_evsel) + return -ENOMEM; + + switch_evsel->core.attr.context_switch = 1; + switch_evsel->immediate = true; + + evsel__set_sample_bit(switch_evsel, TID); + evsel__set_sample_bit(switch_evsel, TIME); + evsel__set_sample_bit(switch_evsel, CPU); + evsel__reset_sample_bit(switch_evsel, BRANCH_STACK); + + opts->record_switch_events = false; + ptr->have_sched_switch = 3; + } else { + opts->record_switch_events = true; + need_immediate = true; + if (cpu_wide) + ptr->have_sched_switch = 3; + else + ptr->have_sched_switch = 2; + } + } else { + err = intel_pt_track_switches(evlist); + if (err == -EPERM) + pr_debug2("Unable to select sched:sched_switch\n"); + else if (err) + return err; + else + ptr->have_sched_switch = 1; + } + } + + if (have_timing_info && !intel_pt_evsel->core.attr.exclude_kernel && + perf_can_record_text_poke_events() && perf_can_record_cpu_wide()) + opts->text_poke = true; + + if (intel_pt_evsel) { + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace + * event must come first. + */ + evlist__to_front(evlist, intel_pt_evsel); + /* + * In the case of per-cpu mmaps, we need the CPU on the + * AUX event. + */ + if (!perf_cpu_map__empty(cpus)) + evsel__set_sample_bit(intel_pt_evsel, CPU); + } + + /* Add dummy event to keep tracking */ + if (opts->full_auxtrace) { + bool need_system_wide_tracking; + struct evsel *tracking_evsel; + + /* + * User space tasks can migrate between CPUs, so when tracing + * selected CPUs, sideband for all CPUs is still needed. + */ + need_system_wide_tracking = opts->target.cpu_list && + !intel_pt_evsel->core.attr.exclude_user; + + tracking_evsel = evlist__add_aux_dummy(evlist, need_system_wide_tracking); + if (!tracking_evsel) + return -ENOMEM; + + evlist__set_tracking_event(evlist, tracking_evsel); + + if (need_immediate) + tracking_evsel->immediate = true; + + /* In per-cpu case, always need the time of mmap events etc */ + if (!perf_cpu_map__empty(cpus)) { + evsel__set_sample_bit(tracking_evsel, TIME); + /* And the CPU for switch events */ + evsel__set_sample_bit(tracking_evsel, CPU); + } + evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); + } + + /* + * Warn the user when we do not have enough information to decode i.e. + * per-cpu with no sched_switch (except workload-only). + */ + if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) && + !target__none(&opts->target) && + !intel_pt_evsel->core.attr.exclude_user) + ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); + + return 0; +} + +static int intel_pt_snapshot_start(struct auxtrace_record *itr) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct evsel *evsel; + + evlist__for_each_entry(ptr->evlist, evsel) { + if (evsel->core.attr.type == ptr->intel_pt_pmu->type) + return evsel__disable(evsel); + } + return -EINVAL; +} + +static int intel_pt_snapshot_finish(struct auxtrace_record *itr) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + struct evsel *evsel; + + evlist__for_each_entry(ptr->evlist, evsel) { + if (evsel->core.attr.type == ptr->intel_pt_pmu->type) + return evsel__enable(evsel); + } + return -EINVAL; +} + +static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx) +{ + const size_t sz = sizeof(struct intel_pt_snapshot_ref); + int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2; + struct intel_pt_snapshot_ref *refs; + + if (!new_cnt) + new_cnt = 16; + + while (new_cnt <= idx) + new_cnt *= 2; + + refs = calloc(new_cnt, sz); + if (!refs) + return -ENOMEM; + + memcpy(refs, ptr->snapshot_refs, cnt * sz); + + ptr->snapshot_refs = refs; + ptr->snapshot_ref_cnt = new_cnt; + + return 0; +} + +static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr) +{ + int i; + + for (i = 0; i < ptr->snapshot_ref_cnt; i++) + zfree(&ptr->snapshot_refs[i].ref_buf); + zfree(&ptr->snapshot_refs); +} + +static void intel_pt_recording_free(struct auxtrace_record *itr) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + + intel_pt_free_snapshot_refs(ptr); + free(ptr); +} + +static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx, + size_t snapshot_buf_size) +{ + size_t ref_buf_size = ptr->snapshot_ref_buf_size; + void *ref_buf; + + ref_buf = zalloc(ref_buf_size); + if (!ref_buf) + return -ENOMEM; + + ptr->snapshot_refs[idx].ref_buf = ref_buf; + ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size; + + return 0; +} + +static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr, + size_t snapshot_buf_size) +{ + const size_t max_size = 256 * 1024; + size_t buf_size = 0, psb_period; + + if (ptr->snapshot_size <= 64 * 1024) + return 0; + + psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist); + if (psb_period) + buf_size = psb_period * 2; + + if (!buf_size || buf_size > max_size) + buf_size = max_size; + + if (buf_size >= snapshot_buf_size) + return 0; + + if (buf_size >= ptr->snapshot_size / 2) + return 0; + + return buf_size; +} + +static int intel_pt_snapshot_init(struct intel_pt_recording *ptr, + size_t snapshot_buf_size) +{ + if (ptr->snapshot_init_done) + return 0; + + ptr->snapshot_init_done = true; + + ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr, + snapshot_buf_size); + + return 0; +} + +/** + * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer. + * @buf1: first buffer + * @compare_size: number of bytes to compare + * @buf2: second buffer (a circular buffer) + * @offs2: offset in second buffer + * @buf2_size: size of second buffer + * + * The comparison allows for the possibility that the bytes to compare in the + * circular buffer are not contiguous. It is assumed that @compare_size <= + * @buf2_size. This function returns %false if the bytes are identical, %true + * otherwise. + */ +static bool intel_pt_compare_buffers(void *buf1, size_t compare_size, + void *buf2, size_t offs2, size_t buf2_size) +{ + size_t end2 = offs2 + compare_size, part_size; + + if (end2 <= buf2_size) + return memcmp(buf1, buf2 + offs2, compare_size); + + part_size = end2 - buf2_size; + if (memcmp(buf1, buf2 + offs2, part_size)) + return true; + + compare_size -= part_size; + + return memcmp(buf1 + part_size, buf2, compare_size); +} + +static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset, + size_t ref_size, size_t buf_size, + void *data, size_t head) +{ + size_t ref_end = ref_offset + ref_size; + + if (ref_end > buf_size) { + if (head > ref_offset || head < ref_end - buf_size) + return true; + } else if (head > ref_offset && head < ref_end) { + return true; + } + + return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset, + buf_size); +} + +static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size, + void *data, size_t head) +{ + if (head >= ref_size) { + memcpy(ref_buf, data + head - ref_size, ref_size); + } else { + memcpy(ref_buf, data, head); + ref_size -= head; + memcpy(ref_buf + head, data + buf_size - ref_size, ref_size); + } +} + +static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 head) +{ + struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx]; + bool wrapped; + + wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset, + ptr->snapshot_ref_buf_size, mm->len, + data, head); + + intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len, + data, head); + + return wrapped; +} + +static bool intel_pt_first_wrap(u64 *data, size_t buf_size) +{ + int i, a, b; + + b = buf_size >> 3; + a = b - 512; + if (a < 0) + a = 0; + + for (i = a; i < b; i++) { + if (data[i]) + return true; + } + + return false; +} + +static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 *head, u64 *old) +{ + struct intel_pt_recording *ptr = + container_of(itr, struct intel_pt_recording, itr); + bool wrapped; + int err; + + pr_debug3("%s: mmap index %d old head %zu new head %zu\n", + __func__, idx, (size_t)*old, (size_t)*head); + + err = intel_pt_snapshot_init(ptr, mm->len); + if (err) + goto out_err; + + if (idx >= ptr->snapshot_ref_cnt) { + err = intel_pt_alloc_snapshot_refs(ptr, idx); + if (err) + goto out_err; + } + + if (ptr->snapshot_ref_buf_size) { + if (!ptr->snapshot_refs[idx].ref_buf) { + err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len); + if (err) + goto out_err; + } + wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head); + } else { + wrapped = ptr->snapshot_refs[idx].wrapped; + if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) { + ptr->snapshot_refs[idx].wrapped = true; + wrapped = true; + } + } + + /* + * In full trace mode 'head' continually increases. However in snapshot + * mode 'head' is an offset within the buffer. Here 'old' and 'head' + * are adjusted to match the full trace case which expects that 'old' is + * always less than 'head'. + */ + if (wrapped) { + *old = *head; + *head += mm->len; + } else { + if (mm->mask) + *old &= mm->mask; + else + *old %= mm->len; + if (*old > *head) + *head += mm->len; + } + + pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", + __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); + + return 0; + +out_err: + pr_err("%s: failed, error %d\n", __func__, err); + return err; +} + +static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) +{ + return rdtsc(); +} + +struct auxtrace_record *intel_pt_recording_init(int *err) +{ + struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); + struct intel_pt_recording *ptr; + + if (!intel_pt_pmu) + return NULL; + + if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) { + *err = -errno; + return NULL; + } + + ptr = zalloc(sizeof(struct intel_pt_recording)); + if (!ptr) { + *err = -ENOMEM; + return NULL; + } + + ptr->intel_pt_pmu = intel_pt_pmu; + ptr->itr.pmu = intel_pt_pmu; + ptr->itr.recording_options = intel_pt_recording_options; + ptr->itr.info_priv_size = intel_pt_info_priv_size; + ptr->itr.info_fill = intel_pt_info_fill; + ptr->itr.free = intel_pt_recording_free; + ptr->itr.snapshot_start = intel_pt_snapshot_start; + ptr->itr.snapshot_finish = intel_pt_snapshot_finish; + ptr->itr.find_snapshot = intel_pt_find_snapshot; + ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options; + ptr->itr.reference = intel_pt_reference; + ptr->itr.read_finish = auxtrace_record__read_finish; + /* + * Decoding starts at a PSB packet. Minimum PSB period is 2K so 4K + * should give at least 1 PSB per sample. + */ + ptr->itr.default_aux_sample_size = 4096; + return &ptr->itr; +} diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c new file mode 100644 index 000000000..404de795e --- /dev/null +++ b/tools/perf/arch/x86/util/iostat.c @@ -0,0 +1,470 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * perf iostat + * + * Copyright (C) 2020, Intel Corporation + * + * Authors: Alexander Antonov <alexander.antonov@linux.intel.com> + */ + +#include <api/fs/fs.h> +#include <linux/kernel.h> +#include <linux/err.h> +#include <limits.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <dirent.h> +#include <unistd.h> +#include <stdlib.h> +#include <regex.h> +#include "util/cpumap.h" +#include "util/debug.h" +#include "util/iostat.h" +#include "util/counts.h" +#include "path.h" + +#ifndef MAX_PATH +#define MAX_PATH 1024 +#endif + +#define UNCORE_IIO_PMU_PATH "devices/uncore_iio_%d" +#define SYSFS_UNCORE_PMU_PATH "%s/"UNCORE_IIO_PMU_PATH +#define PLATFORM_MAPPING_PATH UNCORE_IIO_PMU_PATH"/die%d" + +/* + * Each metric requiries one IIO event which increments at every 4B transfer + * in corresponding direction. The formulas to compute metrics are generic: + * #EventCount * 4B / (1024 * 1024) + */ +static const char * const iostat_metrics[] = { + "Inbound Read(MB)", + "Inbound Write(MB)", + "Outbound Read(MB)", + "Outbound Write(MB)", +}; + +static inline int iostat_metrics_count(void) +{ + return sizeof(iostat_metrics) / sizeof(char *); +} + +static const char *iostat_metric_by_idx(int idx) +{ + return *(iostat_metrics + idx % iostat_metrics_count()); +} + +struct iio_root_port { + u32 domain; + u8 bus; + u8 die; + u8 pmu_idx; + int idx; +}; + +struct iio_root_ports_list { + struct iio_root_port **rps; + int nr_entries; +}; + +static struct iio_root_ports_list *root_ports; + +static void iio_root_port_show(FILE *output, + const struct iio_root_port * const rp) +{ + if (output && rp) + fprintf(output, "S%d-uncore_iio_%d<%04x:%02x>\n", + rp->die, rp->pmu_idx, rp->domain, rp->bus); +} + +static struct iio_root_port *iio_root_port_new(u32 domain, u8 bus, + u8 die, u8 pmu_idx) +{ + struct iio_root_port *p = calloc(1, sizeof(*p)); + + if (p) { + p->domain = domain; + p->bus = bus; + p->die = die; + p->pmu_idx = pmu_idx; + } + return p; +} + +static void iio_root_ports_list_free(struct iio_root_ports_list *list) +{ + int idx; + + if (list) { + for (idx = 0; idx < list->nr_entries; idx++) + free(list->rps[idx]); + free(list->rps); + free(list); + } +} + +static struct iio_root_port *iio_root_port_find_by_notation( + const struct iio_root_ports_list * const list, u32 domain, u8 bus) +{ + int idx; + struct iio_root_port *rp; + + if (list) { + for (idx = 0; idx < list->nr_entries; idx++) { + rp = list->rps[idx]; + if (rp && rp->domain == domain && rp->bus == bus) + return rp; + } + } + return NULL; +} + +static int iio_root_ports_list_insert(struct iio_root_ports_list *list, + struct iio_root_port * const rp) +{ + struct iio_root_port **tmp_buf; + + if (list && rp) { + rp->idx = list->nr_entries++; + tmp_buf = realloc(list->rps, + list->nr_entries * sizeof(*list->rps)); + if (!tmp_buf) { + pr_err("Failed to realloc memory\n"); + return -ENOMEM; + } + tmp_buf[rp->idx] = rp; + list->rps = tmp_buf; + } + return 0; +} + +static int iio_mapping(u8 pmu_idx, struct iio_root_ports_list * const list) +{ + char *buf; + char path[MAX_PATH]; + u32 domain; + u8 bus; + struct iio_root_port *rp; + size_t size; + int ret; + + for (int die = 0; die < cpu__max_node(); die++) { + scnprintf(path, MAX_PATH, PLATFORM_MAPPING_PATH, pmu_idx, die); + if (sysfs__read_str(path, &buf, &size) < 0) { + if (pmu_idx) + goto out; + pr_err("Mode iostat is not supported\n"); + return -1; + } + ret = sscanf(buf, "%04x:%02hhx", &domain, &bus); + free(buf); + if (ret != 2) { + pr_err("Invalid mapping data: iio_%d; die%d\n", + pmu_idx, die); + return -1; + } + rp = iio_root_port_new(domain, bus, die, pmu_idx); + if (!rp || iio_root_ports_list_insert(list, rp)) { + free(rp); + return -ENOMEM; + } + } +out: + return 0; +} + +static u8 iio_pmu_count(void) +{ + u8 pmu_idx = 0; + char path[MAX_PATH]; + const char *sysfs = sysfs__mountpoint(); + + if (sysfs) { + for (;; pmu_idx++) { + snprintf(path, sizeof(path), SYSFS_UNCORE_PMU_PATH, + sysfs, pmu_idx); + if (access(path, F_OK) != 0) + break; + } + } + return pmu_idx; +} + +static int iio_root_ports_scan(struct iio_root_ports_list **list) +{ + int ret = -ENOMEM; + struct iio_root_ports_list *tmp_list; + u8 pmu_count = iio_pmu_count(); + + if (!pmu_count) { + pr_err("Unsupported uncore pmu configuration\n"); + return -1; + } + + tmp_list = calloc(1, sizeof(*tmp_list)); + if (!tmp_list) + goto err; + + for (u8 pmu_idx = 0; pmu_idx < pmu_count; pmu_idx++) { + ret = iio_mapping(pmu_idx, tmp_list); + if (ret) + break; + } +err: + if (!ret) + *list = tmp_list; + else + iio_root_ports_list_free(tmp_list); + + return ret; +} + +static int iio_root_port_parse_str(u32 *domain, u8 *bus, char *str) +{ + int ret; + regex_t regex; + /* + * Expected format domain:bus: + * Valid domain range [0:ffff] + * Valid bus range [0:ff] + * Example: 0000:af, 0:3d, 01:7 + */ + regcomp(®ex, "^([a-f0-9A-F]{1,}):([a-f0-9A-F]{1,2})", REG_EXTENDED); + ret = regexec(®ex, str, 0, NULL, 0); + if (ret || sscanf(str, "%08x:%02hhx", domain, bus) != 2) + pr_warning("Unrecognized root port format: %s\n" + "Please use the following format:\n" + "\t [domain]:[bus]\n" + "\t for example: 0000:3d\n", str); + + regfree(®ex); + return ret; +} + +static int iio_root_ports_list_filter(struct iio_root_ports_list **list, + const char *filter) +{ + char *tok, *tmp, *filter_copy = NULL; + struct iio_root_port *rp; + u32 domain; + u8 bus; + int ret = -ENOMEM; + struct iio_root_ports_list *tmp_list = calloc(1, sizeof(*tmp_list)); + + if (!tmp_list) + goto err; + + filter_copy = strdup(filter); + if (!filter_copy) + goto err; + + for (tok = strtok_r(filter_copy, ",", &tmp); tok; + tok = strtok_r(NULL, ",", &tmp)) { + if (!iio_root_port_parse_str(&domain, &bus, tok)) { + rp = iio_root_port_find_by_notation(*list, domain, bus); + if (rp) { + (*list)->rps[rp->idx] = NULL; + ret = iio_root_ports_list_insert(tmp_list, rp); + if (ret) { + free(rp); + goto err; + } + } else if (!iio_root_port_find_by_notation(tmp_list, + domain, bus)) + pr_warning("Root port %04x:%02x were not found\n", + domain, bus); + } + } + + if (tmp_list->nr_entries == 0) { + pr_err("Requested root ports were not found\n"); + ret = -EINVAL; + } +err: + iio_root_ports_list_free(*list); + if (ret) + iio_root_ports_list_free(tmp_list); + else + *list = tmp_list; + + free(filter_copy); + return ret; +} + +static int iostat_event_group(struct evlist *evl, + struct iio_root_ports_list *list) +{ + int ret; + int idx; + const char *iostat_cmd_template = + "{uncore_iio_%x/event=0x83,umask=0x04,ch_mask=0xF,fc_mask=0x07/,\ + uncore_iio_%x/event=0x83,umask=0x01,ch_mask=0xF,fc_mask=0x07/,\ + uncore_iio_%x/event=0xc0,umask=0x04,ch_mask=0xF,fc_mask=0x07/,\ + uncore_iio_%x/event=0xc0,umask=0x01,ch_mask=0xF,fc_mask=0x07/}"; + const int len_template = strlen(iostat_cmd_template) + 1; + struct evsel *evsel = NULL; + int metrics_count = iostat_metrics_count(); + char *iostat_cmd = calloc(len_template, 1); + + if (!iostat_cmd) + return -ENOMEM; + + for (idx = 0; idx < list->nr_entries; idx++) { + sprintf(iostat_cmd, iostat_cmd_template, + list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx, + list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx); + ret = parse_event(evl, iostat_cmd); + if (ret) + goto err; + } + + evlist__for_each_entry(evl, evsel) { + evsel->priv = list->rps[evsel->core.idx / metrics_count]; + } + list->nr_entries = 0; +err: + iio_root_ports_list_free(list); + free(iostat_cmd); + return ret; +} + +int iostat_prepare(struct evlist *evlist, struct perf_stat_config *config) +{ + if (evlist->core.nr_entries > 0) { + pr_warning("The -e and -M options are not supported." + "All chosen events/metrics will be dropped\n"); + evlist__delete(evlist); + evlist = evlist__new(); + if (!evlist) + return -ENOMEM; + } + + config->metric_only = true; + config->aggr_mode = AGGR_GLOBAL; + + return iostat_event_group(evlist, root_ports); +} + +int iostat_parse(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + int ret; + struct perf_stat_config *config = (struct perf_stat_config *)opt->data; + + ret = iio_root_ports_scan(&root_ports); + if (!ret) { + config->iostat_run = true; + if (!str) + iostat_mode = IOSTAT_RUN; + else if (!strcmp(str, "list")) + iostat_mode = IOSTAT_LIST; + else { + iostat_mode = IOSTAT_RUN; + ret = iio_root_ports_list_filter(&root_ports, str); + } + } + return ret; +} + +void iostat_list(struct evlist *evlist, struct perf_stat_config *config) +{ + struct evsel *evsel; + struct iio_root_port *rp = NULL; + + evlist__for_each_entry(evlist, evsel) { + if (rp != evsel->priv) { + rp = evsel->priv; + iio_root_port_show(config->output, rp); + } + } +} + +void iostat_release(struct evlist *evlist) +{ + struct evsel *evsel; + struct iio_root_port *rp = NULL; + + evlist__for_each_entry(evlist, evsel) { + if (rp != evsel->priv) { + rp = evsel->priv; + free(evsel->priv); + } + } +} + +void iostat_prefix(struct evlist *evlist, + struct perf_stat_config *config, + char *prefix, struct timespec *ts) +{ + struct iio_root_port *rp = evlist->selected->priv; + + if (rp) { + if (ts) + sprintf(prefix, "%6lu.%09lu%s%04x:%02x%s", + ts->tv_sec, ts->tv_nsec, + config->csv_sep, rp->domain, rp->bus, + config->csv_sep); + else + sprintf(prefix, "%04x:%02x%s", rp->domain, rp->bus, + config->csv_sep); + } +} + +void iostat_print_header_prefix(struct perf_stat_config *config) +{ + if (config->csv_output) + fputs("port,", config->output); + else if (config->interval) + fprintf(config->output, "# time port "); + else + fprintf(config->output, " port "); +} + +void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel, + struct perf_stat_output_ctx *out) +{ + double iostat_value = 0; + u64 prev_count_val = 0; + const char *iostat_metric = iostat_metric_by_idx(evsel->core.idx); + u8 die = ((struct iio_root_port *)evsel->priv)->die; + struct perf_counts_values *count = perf_counts(evsel->counts, die, 0); + + if (count && count->run && count->ena) { + if (evsel->prev_raw_counts && !out->force_header) { + struct perf_counts_values *prev_count = + perf_counts(evsel->prev_raw_counts, die, 0); + + prev_count_val = prev_count->val; + prev_count->val = count->val; + } + iostat_value = (count->val - prev_count_val) / + ((double) count->run / count->ena); + } + out->print_metric(config, out->ctx, NULL, "%8.0f", iostat_metric, + iostat_value / (256 * 1024)); +} + +void iostat_print_counters(struct evlist *evlist, + struct perf_stat_config *config, struct timespec *ts, + char *prefix, iostat_print_counter_t print_cnt_cb) +{ + void *perf_device = NULL; + struct evsel *counter = evlist__first(evlist); + + evlist__set_selected(evlist, counter); + iostat_prefix(evlist, config, prefix, ts); + fprintf(config->output, "%s", prefix); + evlist__for_each_entry(evlist, counter) { + perf_device = evlist->selected->priv; + if (perf_device && perf_device != counter->priv) { + evlist__set_selected(evlist, counter); + iostat_prefix(evlist, config, prefix, ts); + fprintf(config->output, "\n%s", prefix); + } + print_cnt_cb(config, counter, prefix); + } + fputc('\n', config->output); +} diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c new file mode 100644 index 000000000..c5dd54f6e --- /dev/null +++ b/tools/perf/arch/x86/util/kvm-stat.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <string.h> +#include "../../../util/kvm-stat.h" +#include "../../../util/evsel.h" +#include <asm/svm.h> +#include <asm/vmx.h> +#include <asm/kvm.h> + +define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS); +define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS); + +static struct kvm_events_ops exit_events = { + .is_begin_event = exit_event_begin, + .is_end_event = exit_event_end, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + +const char *vcpu_id_str = "vcpu_id"; +const int decode_str_len = 20; +const char *kvm_exit_reason = "exit_reason"; +const char *kvm_entry_trace = "kvm:kvm_entry"; +const char *kvm_exit_trace = "kvm:kvm_exit"; + +/* + * For the mmio events, we treat: + * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry + * the time of MMIO read: kvm_exit -> kvm_mmio(KVM_TRACE_MMIO_READ...). + */ +static void mmio_event_get_key(struct evsel *evsel, struct perf_sample *sample, + struct event_key *key) +{ + key->key = evsel__intval(evsel, sample, "gpa"); + key->info = evsel__intval(evsel, sample, "type"); +} + +#define KVM_TRACE_MMIO_READ_UNSATISFIED 0 +#define KVM_TRACE_MMIO_READ 1 +#define KVM_TRACE_MMIO_WRITE 2 + +static bool mmio_event_begin(struct evsel *evsel, + struct perf_sample *sample, struct event_key *key) +{ + /* MMIO read begin event in kernel. */ + if (kvm_exit_event(evsel)) + return true; + + /* MMIO write begin event in kernel. */ + if (!strcmp(evsel->name, "kvm:kvm_mmio") && + evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) { + mmio_event_get_key(evsel, sample, key); + return true; + } + + return false; +} + +static bool mmio_event_end(struct evsel *evsel, struct perf_sample *sample, + struct event_key *key) +{ + /* MMIO write end event in kernel. */ + if (kvm_entry_event(evsel)) + return true; + + /* MMIO read end event in kernel.*/ + if (!strcmp(evsel->name, "kvm:kvm_mmio") && + evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) { + mmio_event_get_key(evsel, sample, key); + return true; + } + + return false; +} + +static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, + struct event_key *key, + char *decode) +{ + scnprintf(decode, decode_str_len, "%#lx:%s", + (unsigned long)key->key, + key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R"); +} + +static struct kvm_events_ops mmio_events = { + .is_begin_event = mmio_event_begin, + .is_end_event = mmio_event_end, + .decode_key = mmio_event_decode_key, + .name = "MMIO Access" +}; + + /* The time of emulation pio access is from kvm_pio to kvm_entry. */ +static void ioport_event_get_key(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->key = evsel__intval(evsel, sample, "port"); + key->info = evsel__intval(evsel, sample, "rw"); +} + +static bool ioport_event_begin(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + if (!strcmp(evsel->name, "kvm:kvm_pio")) { + ioport_event_get_key(evsel, sample, key); + return true; + } + + return false; +} + +static bool ioport_event_end(struct evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return kvm_entry_event(evsel); +} + +static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, + struct event_key *key, + char *decode) +{ + scnprintf(decode, decode_str_len, "%#llx:%s", + (unsigned long long)key->key, + key->info ? "POUT" : "PIN"); +} + +static struct kvm_events_ops ioport_events = { + .is_begin_event = ioport_event_begin, + .is_end_event = ioport_event_end, + .decode_key = ioport_event_decode_key, + .name = "IO Port Access" +}; + + /* The time of emulation msr is from kvm_msr to kvm_entry. */ +static void msr_event_get_key(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->key = evsel__intval(evsel, sample, "ecx"); + key->info = evsel__intval(evsel, sample, "write"); +} + +static bool msr_event_begin(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + if (!strcmp(evsel->name, "kvm:kvm_msr")) { + msr_event_get_key(evsel, sample, key); + return true; + } + + return false; +} + +static bool msr_event_end(struct evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return kvm_entry_event(evsel); +} + +static void msr_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, + struct event_key *key, + char *decode) +{ + scnprintf(decode, decode_str_len, "%#llx:%s", + (unsigned long long)key->key, + key->info ? "W" : "R"); +} + +static struct kvm_events_ops msr_events = { + .is_begin_event = msr_event_begin, + .is_end_event = msr_event_end, + .decode_key = msr_event_decode_key, + .name = "MSR Access" +}; + +const char *kvm_events_tp[] = { + "kvm:kvm_entry", + "kvm:kvm_exit", + "kvm:kvm_mmio", + "kvm:kvm_pio", + "kvm:kvm_msr", + NULL, +}; + +struct kvm_reg_events_ops kvm_reg_events_ops[] = { + { .name = "vmexit", .ops = &exit_events }, + { .name = "mmio", .ops = &mmio_events }, + { .name = "ioport", .ops = &ioport_events }, + { .name = "msr", .ops = &msr_events }, + { NULL, NULL }, +}; + +const char * const kvm_skip_events[] = { + "HLT", + NULL, +}; + +int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid) +{ + if (strstr(cpuid, "Intel")) { + kvm->exit_reasons = vmx_exit_reasons; + kvm->exit_reasons_isa = "VMX"; + } else if (strstr(cpuid, "AMD") || strstr(cpuid, "Hygon")) { + kvm->exit_reasons = svm_exit_reasons; + kvm->exit_reasons_isa = "SVM"; + } else + return -ENOTSUP; + + return 0; +} diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c new file mode 100644 index 000000000..31679c35d --- /dev/null +++ b/tools/perf/arch/x86/util/machine.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/types.h> +#include <linux/string.h> +#include <limits.h> +#include <stdlib.h> + +#include <internal/lib.h> // page_size +#include "../../../util/machine.h" +#include "../../../util/map.h" +#include "../../../util/symbol.h" +#include <linux/ctype.h> + +#include <symbol/kallsyms.h> + +#if defined(__x86_64__) + +struct extra_kernel_map_info { + int cnt; + int max_cnt; + struct extra_kernel_map *maps; + bool get_entry_trampolines; + u64 entry_trampoline; +}; + +static int add_extra_kernel_map(struct extra_kernel_map_info *mi, u64 start, + u64 end, u64 pgoff, const char *name) +{ + if (mi->cnt >= mi->max_cnt) { + void *buf; + size_t sz; + + mi->max_cnt = mi->max_cnt ? mi->max_cnt * 2 : 32; + sz = sizeof(struct extra_kernel_map) * mi->max_cnt; + buf = realloc(mi->maps, sz); + if (!buf) + return -1; + mi->maps = buf; + } + + mi->maps[mi->cnt].start = start; + mi->maps[mi->cnt].end = end; + mi->maps[mi->cnt].pgoff = pgoff; + strlcpy(mi->maps[mi->cnt].name, name, KMAP_NAME_LEN); + + mi->cnt += 1; + + return 0; +} + +static int find_extra_kernel_maps(void *arg, const char *name, char type, + u64 start) +{ + struct extra_kernel_map_info *mi = arg; + + if (!mi->entry_trampoline && kallsyms2elf_binding(type) == STB_GLOBAL && + !strcmp(name, "_entry_trampoline")) { + mi->entry_trampoline = start; + return 0; + } + + if (is_entry_trampoline(name)) { + u64 end = start + page_size; + + return add_extra_kernel_map(mi, start, end, 0, name); + } + + return 0; +} + +int machine__create_extra_kernel_maps(struct machine *machine, + struct dso *kernel) +{ + struct extra_kernel_map_info mi = { .cnt = 0, }; + char filename[PATH_MAX]; + int ret; + int i; + + machine__get_kallsyms_filename(machine, filename, PATH_MAX); + + if (symbol__restricted_filename(filename, "/proc/kallsyms")) + return 0; + + ret = kallsyms__parse(filename, &mi, find_extra_kernel_maps); + if (ret) + goto out_free; + + if (!mi.entry_trampoline) + goto out_free; + + for (i = 0; i < mi.cnt; i++) { + struct extra_kernel_map *xm = &mi.maps[i]; + + xm->pgoff = mi.entry_trampoline; + ret = machine__create_extra_kernel_map(machine, kernel, xm); + if (ret) + goto out_free; + } + + machine->trampolines_mapped = mi.cnt; +out_free: + free(mi.maps); + return ret; +} + +#endif diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c new file mode 100644 index 000000000..efc0fae9e --- /dev/null +++ b/tools/perf/arch/x86/util/mem-events.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "util/pmu.h" +#include "util/env.h" +#include "map_symbol.h" +#include "mem-events.h" +#include "linux/string.h" +#include "env.h" + +static char mem_loads_name[100]; +static bool mem_loads_name__init; +static char mem_stores_name[100]; + +#define MEM_LOADS_AUX 0x8203 +#define MEM_LOADS_AUX_NAME "{%s/mem-loads-aux/,%s/mem-loads,ldlat=%u/}:P" + +#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } + +static struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX] = { + E("ldlat-loads", "%s/mem-loads,ldlat=%u/P", "%s/events/mem-loads"), + E("ldlat-stores", "%s/mem-stores/P", "%s/events/mem-stores"), + E(NULL, NULL, NULL), +}; + +static struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = { + E(NULL, NULL, NULL), + E(NULL, NULL, NULL), + E("mem-ldst", "ibs_op//", "ibs_op"), +}; + +struct perf_mem_event *perf_mem_events__ptr(int i) +{ + if (i >= PERF_MEM_EVENTS__MAX) + return NULL; + + if (x86__is_amd_cpu()) + return &perf_mem_events_amd[i]; + + return &perf_mem_events_intel[i]; +} + +bool is_mem_loads_aux_event(struct evsel *leader) +{ + if (perf_pmu__find("cpu")) { + if (!pmu_have_event("cpu", "mem-loads-aux")) + return false; + } else if (perf_pmu__find("cpu_core")) { + if (!pmu_have_event("cpu_core", "mem-loads-aux")) + return false; + } + + return leader->core.attr.config == MEM_LOADS_AUX; +} + +char *perf_mem_events__name(int i, char *pmu_name) +{ + struct perf_mem_event *e = perf_mem_events__ptr(i); + + if (!e) + return NULL; + + if (i == PERF_MEM_EVENTS__LOAD) { + if (mem_loads_name__init && !pmu_name) + return mem_loads_name; + + if (!pmu_name) { + mem_loads_name__init = true; + pmu_name = (char *)"cpu"; + } + + if (pmu_have_event(pmu_name, "mem-loads-aux")) { + scnprintf(mem_loads_name, sizeof(mem_loads_name), + MEM_LOADS_AUX_NAME, pmu_name, pmu_name, + perf_mem_events__loads_ldlat); + } else { + scnprintf(mem_loads_name, sizeof(mem_loads_name), + e->name, pmu_name, + perf_mem_events__loads_ldlat); + } + return mem_loads_name; + } + + if (i == PERF_MEM_EVENTS__STORE) { + if (!pmu_name) + pmu_name = (char *)"cpu"; + + scnprintf(mem_stores_name, sizeof(mem_stores_name), + e->name, pmu_name); + return mem_stores_name; + } + + return (char *)e->name; +} diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c new file mode 100644 index 000000000..0ed177991 --- /dev/null +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <string.h> +#include <regex.h> +#include <linux/kernel.h> +#include <linux/zalloc.h> + +#include "../../../perf-sys.h" +#include "../../../util/perf_regs.h" +#include "../../../util/debug.h" +#include "../../../util/event.h" +#include "../../../util/pmu.h" +#include "../../../util/pmu-hybrid.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG(AX, PERF_REG_X86_AX), + SMPL_REG(BX, PERF_REG_X86_BX), + SMPL_REG(CX, PERF_REG_X86_CX), + SMPL_REG(DX, PERF_REG_X86_DX), + SMPL_REG(SI, PERF_REG_X86_SI), + SMPL_REG(DI, PERF_REG_X86_DI), + SMPL_REG(BP, PERF_REG_X86_BP), + SMPL_REG(SP, PERF_REG_X86_SP), + SMPL_REG(IP, PERF_REG_X86_IP), + SMPL_REG(FLAGS, PERF_REG_X86_FLAGS), + SMPL_REG(CS, PERF_REG_X86_CS), + SMPL_REG(SS, PERF_REG_X86_SS), +#ifdef HAVE_ARCH_X86_64_SUPPORT + SMPL_REG(R8, PERF_REG_X86_R8), + SMPL_REG(R9, PERF_REG_X86_R9), + SMPL_REG(R10, PERF_REG_X86_R10), + SMPL_REG(R11, PERF_REG_X86_R11), + SMPL_REG(R12, PERF_REG_X86_R12), + SMPL_REG(R13, PERF_REG_X86_R13), + SMPL_REG(R14, PERF_REG_X86_R14), + SMPL_REG(R15, PERF_REG_X86_R15), +#endif + SMPL_REG2(XMM0, PERF_REG_X86_XMM0), + SMPL_REG2(XMM1, PERF_REG_X86_XMM1), + SMPL_REG2(XMM2, PERF_REG_X86_XMM2), + SMPL_REG2(XMM3, PERF_REG_X86_XMM3), + SMPL_REG2(XMM4, PERF_REG_X86_XMM4), + SMPL_REG2(XMM5, PERF_REG_X86_XMM5), + SMPL_REG2(XMM6, PERF_REG_X86_XMM6), + SMPL_REG2(XMM7, PERF_REG_X86_XMM7), + SMPL_REG2(XMM8, PERF_REG_X86_XMM8), + SMPL_REG2(XMM9, PERF_REG_X86_XMM9), + SMPL_REG2(XMM10, PERF_REG_X86_XMM10), + SMPL_REG2(XMM11, PERF_REG_X86_XMM11), + SMPL_REG2(XMM12, PERF_REG_X86_XMM12), + SMPL_REG2(XMM13, PERF_REG_X86_XMM13), + SMPL_REG2(XMM14, PERF_REG_X86_XMM14), + SMPL_REG2(XMM15, PERF_REG_X86_XMM15), + SMPL_REG_END +}; + +struct sdt_name_reg { + const char *sdt_name; + const char *uprobe_name; +}; +#define SDT_NAME_REG(n, m) {.sdt_name = "%" #n, .uprobe_name = "%" #m} +#define SDT_NAME_REG_END {.sdt_name = NULL, .uprobe_name = NULL} + +static const struct sdt_name_reg sdt_reg_tbl[] = { + SDT_NAME_REG(eax, ax), + SDT_NAME_REG(rax, ax), + SDT_NAME_REG(al, ax), + SDT_NAME_REG(ah, ax), + SDT_NAME_REG(ebx, bx), + SDT_NAME_REG(rbx, bx), + SDT_NAME_REG(bl, bx), + SDT_NAME_REG(bh, bx), + SDT_NAME_REG(ecx, cx), + SDT_NAME_REG(rcx, cx), + SDT_NAME_REG(cl, cx), + SDT_NAME_REG(ch, cx), + SDT_NAME_REG(edx, dx), + SDT_NAME_REG(rdx, dx), + SDT_NAME_REG(dl, dx), + SDT_NAME_REG(dh, dx), + SDT_NAME_REG(esi, si), + SDT_NAME_REG(rsi, si), + SDT_NAME_REG(sil, si), + SDT_NAME_REG(edi, di), + SDT_NAME_REG(rdi, di), + SDT_NAME_REG(dil, di), + SDT_NAME_REG(ebp, bp), + SDT_NAME_REG(rbp, bp), + SDT_NAME_REG(bpl, bp), + SDT_NAME_REG(rsp, sp), + SDT_NAME_REG(esp, sp), + SDT_NAME_REG(spl, sp), + + /* rNN registers */ + SDT_NAME_REG(r8b, r8), + SDT_NAME_REG(r8w, r8), + SDT_NAME_REG(r8d, r8), + SDT_NAME_REG(r9b, r9), + SDT_NAME_REG(r9w, r9), + SDT_NAME_REG(r9d, r9), + SDT_NAME_REG(r10b, r10), + SDT_NAME_REG(r10w, r10), + SDT_NAME_REG(r10d, r10), + SDT_NAME_REG(r11b, r11), + SDT_NAME_REG(r11w, r11), + SDT_NAME_REG(r11d, r11), + SDT_NAME_REG(r12b, r12), + SDT_NAME_REG(r12w, r12), + SDT_NAME_REG(r12d, r12), + SDT_NAME_REG(r13b, r13), + SDT_NAME_REG(r13w, r13), + SDT_NAME_REG(r13d, r13), + SDT_NAME_REG(r14b, r14), + SDT_NAME_REG(r14w, r14), + SDT_NAME_REG(r14d, r14), + SDT_NAME_REG(r15b, r15), + SDT_NAME_REG(r15w, r15), + SDT_NAME_REG(r15d, r15), + SDT_NAME_REG_END, +}; + +/* + * Perf only supports OP which is in +/-NUM(REG) form. + * Here plus-minus sign, NUM and parenthesis are optional, + * only REG is mandatory. + * + * SDT events also supports indirect addressing mode with a + * symbol as offset, scaled mode and constants in OP. But + * perf does not support them yet. Below are few examples. + * + * OP with scaled mode: + * (%rax,%rsi,8) + * 10(%ras,%rsi,8) + * + * OP with indirect addressing mode: + * check_action(%rip) + * mp_+52(%rip) + * 44+mp_(%rip) + * + * OP with constant values: + * $0 + * $123 + * $-1 + */ +#define SDT_OP_REGEX "^([+\\-]?)([0-9]*)(\\(?)(%[a-z][a-z0-9]+)(\\)?)$" + +static regex_t sdt_op_regex; + +static int sdt_init_op_regex(void) +{ + static int initialized; + int ret = 0; + + if (initialized) + return 0; + + ret = regcomp(&sdt_op_regex, SDT_OP_REGEX, REG_EXTENDED); + if (ret < 0) { + pr_debug4("Regex compilation error.\n"); + return ret; + } + + initialized = 1; + return 0; +} + +/* + * Max x86 register name length is 5(ex: %r15d). So, 6th char + * should always contain NULL. This helps to find register name + * length using strlen, instead of maintaining one more variable. + */ +#define SDT_REG_NAME_SIZE 6 + +/* + * The uprobe parser does not support all gas register names; + * so, we have to replace them (ex. for x86_64: %rax -> %ax). + * Note: If register does not require renaming, just copy + * paste as it is, but don't leave it empty. + */ +static void sdt_rename_register(char *sdt_reg, int sdt_len, char *uprobe_reg) +{ + int i = 0; + + for (i = 0; sdt_reg_tbl[i].sdt_name != NULL; i++) { + if (!strncmp(sdt_reg_tbl[i].sdt_name, sdt_reg, sdt_len)) { + strcpy(uprobe_reg, sdt_reg_tbl[i].uprobe_name); + return; + } + } + + strncpy(uprobe_reg, sdt_reg, sdt_len); +} + +int arch_sdt_arg_parse_op(char *old_op, char **new_op) +{ + char new_reg[SDT_REG_NAME_SIZE] = {0}; + int new_len = 0, ret; + /* + * rm[0]: +/-NUM(REG) + * rm[1]: +/- + * rm[2]: NUM + * rm[3]: ( + * rm[4]: REG + * rm[5]: ) + */ + regmatch_t rm[6]; + /* + * Max prefix length is 2 as it may contains sign(+/-) + * and displacement 0 (Both sign and displacement 0 are + * optional so it may be empty). Use one more character + * to hold last NULL so that strlen can be used to find + * prefix length, instead of maintaining one more variable. + */ + char prefix[3] = {0}; + + ret = sdt_init_op_regex(); + if (ret < 0) + return ret; + + /* + * If unsupported OR does not match with regex OR + * register name too long, skip it. + */ + if (strchr(old_op, ',') || strchr(old_op, '$') || + regexec(&sdt_op_regex, old_op, 6, rm, 0) || + rm[4].rm_eo - rm[4].rm_so > SDT_REG_NAME_SIZE) { + pr_debug4("Skipping unsupported SDT argument: %s\n", old_op); + return SDT_ARG_SKIP; + } + + /* + * Prepare prefix. + * If SDT OP has parenthesis but does not provide + * displacement, add 0 for displacement. + * SDT Uprobe Prefix + * ----------------------------- + * +24(%rdi) +24(%di) + + * 24(%rdi) +24(%di) + + * %rdi %di + * (%rdi) +0(%di) +0 + * -80(%rbx) -80(%bx) - + */ + if (rm[3].rm_so != rm[3].rm_eo) { + if (rm[1].rm_so != rm[1].rm_eo) + prefix[0] = *(old_op + rm[1].rm_so); + else if (rm[2].rm_so != rm[2].rm_eo) + prefix[0] = '+'; + else + scnprintf(prefix, sizeof(prefix), "+0"); + } + + /* Rename register */ + sdt_rename_register(old_op + rm[4].rm_so, rm[4].rm_eo - rm[4].rm_so, + new_reg); + + /* Prepare final OP which should be valid for uprobe_events */ + new_len = strlen(prefix) + + (rm[2].rm_eo - rm[2].rm_so) + + (rm[3].rm_eo - rm[3].rm_so) + + strlen(new_reg) + + (rm[5].rm_eo - rm[5].rm_so) + + 1; /* NULL */ + + *new_op = zalloc(new_len); + if (!*new_op) + return -ENOMEM; + + scnprintf(*new_op, new_len, "%.*s%.*s%.*s%.*s%.*s", + strlen(prefix), prefix, + (int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so, + (int)(rm[3].rm_eo - rm[3].rm_so), old_op + rm[3].rm_so, + strlen(new_reg), new_reg, + (int)(rm[5].rm_eo - rm[5].rm_so), old_op + rm[5].rm_so); + + return SDT_ARG_VALID; +} + +uint64_t arch__intr_reg_mask(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .sample_type = PERF_SAMPLE_REGS_INTR, + .sample_regs_intr = PERF_REG_EXTENDED_MASK, + .precise_ip = 1, + .disabled = 1, + .exclude_kernel = 1, + }; + struct perf_pmu *pmu; + int fd; + /* + * In an unnamed union, init it here to build on older gcc versions + */ + attr.sample_period = 1; + + if (perf_pmu__has_hybrid()) { + /* + * The same register set is supported among different hybrid PMUs. + * Only check the first available one. + */ + pmu = list_first_entry(&perf_pmu__hybrid_pmus, typeof(*pmu), hybrid_list); + attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + } + + event_attr_init(&attr); + + fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + if (fd != -1) { + close(fd); + return (PERF_REG_EXTENDED_MASK | PERF_REGS_MASK); + } + + return PERF_REGS_MASK; +} diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c new file mode 100644 index 000000000..74d69db1e --- /dev/null +++ b/tools/perf/arch/x86/util/pmu.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <string.h> +#include <stdio.h> +#include <sys/types.h> +#include <dirent.h> +#include <fcntl.h> +#include <linux/stddef.h> +#include <linux/perf_event.h> +#include <linux/zalloc.h> +#include <api/fs/fs.h> +#include <errno.h> + +#include "../../../util/intel-pt.h" +#include "../../../util/intel-bts.h" +#include "../../../util/pmu.h" +#include "../../../util/fncache.h" + +#define TEMPLATE_ALIAS "%s/bus/event_source/devices/%s/alias" + +struct pmu_alias { + char *name; + char *alias; + struct list_head list; +}; + +static LIST_HEAD(pmu_alias_name_list); +static bool cached_list; + +struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) +{ +#ifdef HAVE_AUXTRACE_SUPPORT + if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) + return intel_pt_pmu_default_config(pmu); + if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) + pmu->selectable = true; +#endif + return NULL; +} + +static void pmu_alias__delete(struct pmu_alias *pmu_alias) +{ + if (!pmu_alias) + return; + + zfree(&pmu_alias->name); + zfree(&pmu_alias->alias); + free(pmu_alias); +} + +static struct pmu_alias *pmu_alias__new(char *name, char *alias) +{ + struct pmu_alias *pmu_alias = zalloc(sizeof(*pmu_alias)); + + if (pmu_alias) { + pmu_alias->name = strdup(name); + if (!pmu_alias->name) + goto out_delete; + + pmu_alias->alias = strdup(alias); + if (!pmu_alias->alias) + goto out_delete; + } + return pmu_alias; + +out_delete: + pmu_alias__delete(pmu_alias); + return NULL; +} + +static int setup_pmu_alias_list(void) +{ + char path[PATH_MAX]; + DIR *dir; + struct dirent *dent; + const char *sysfs = sysfs__mountpoint(); + struct pmu_alias *pmu_alias; + char buf[MAX_PMU_NAME_LEN]; + FILE *file; + int ret = -ENOMEM; + + if (!sysfs) + return -1; + + snprintf(path, PATH_MAX, + "%s" EVENT_SOURCE_DEVICE_PATH, sysfs); + + dir = opendir(path); + if (!dir) + return -errno; + + while ((dent = readdir(dir))) { + if (!strcmp(dent->d_name, ".") || + !strcmp(dent->d_name, "..")) + continue; + + snprintf(path, PATH_MAX, + TEMPLATE_ALIAS, sysfs, dent->d_name); + + if (!file_available(path)) + continue; + + file = fopen(path, "r"); + if (!file) + continue; + + if (!fgets(buf, sizeof(buf), file)) { + fclose(file); + continue; + } + + fclose(file); + + /* Remove the last '\n' */ + buf[strlen(buf) - 1] = 0; + + pmu_alias = pmu_alias__new(dent->d_name, buf); + if (!pmu_alias) + goto close_dir; + + list_add_tail(&pmu_alias->list, &pmu_alias_name_list); + } + + ret = 0; + +close_dir: + closedir(dir); + return ret; +} + +static char *__pmu_find_real_name(const char *name) +{ + struct pmu_alias *pmu_alias; + + list_for_each_entry(pmu_alias, &pmu_alias_name_list, list) { + if (!strcmp(name, pmu_alias->alias)) + return pmu_alias->name; + } + + return (char *)name; +} + +char *pmu_find_real_name(const char *name) +{ + if (cached_list) + return __pmu_find_real_name(name); + + setup_pmu_alias_list(); + cached_list = true; + + return __pmu_find_real_name(name); +} + +static char *__pmu_find_alias_name(const char *name) +{ + struct pmu_alias *pmu_alias; + + list_for_each_entry(pmu_alias, &pmu_alias_name_list, list) { + if (!strcmp(name, pmu_alias->name)) + return pmu_alias->alias; + } + return NULL; +} + +char *pmu_find_alias_name(const char *name) +{ + if (cached_list) + return __pmu_find_alias_name(name); + + setup_pmu_alias_list(); + cached_list = true; + + return __pmu_find_alias_name(name); +} diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c new file mode 100644 index 000000000..54810f9ac --- /dev/null +++ b/tools/perf/arch/x86/util/topdown.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include "api/fs/fs.h" +#include "util/pmu.h" +#include "util/topdown.h" +#include "util/evlist.h" +#include "util/debug.h" +#include "util/pmu-hybrid.h" +#include "topdown.h" +#include "evsel.h" + +#define TOPDOWN_L1_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}" +#define TOPDOWN_L1_EVENTS_CORE "{slots,cpu_core/topdown-retiring/,cpu_core/topdown-bad-spec/,cpu_core/topdown-fe-bound/,cpu_core/topdown-be-bound/}" +#define TOPDOWN_L2_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound,topdown-heavy-ops,topdown-br-mispredict,topdown-fetch-lat,topdown-mem-bound}" +#define TOPDOWN_L2_EVENTS_CORE "{slots,cpu_core/topdown-retiring/,cpu_core/topdown-bad-spec/,cpu_core/topdown-fe-bound/,cpu_core/topdown-be-bound/,cpu_core/topdown-heavy-ops/,cpu_core/topdown-br-mispredict/,cpu_core/topdown-fetch-lat/,cpu_core/topdown-mem-bound/}" + +/* Check whether there is a PMU which supports the perf metrics. */ +bool topdown_sys_has_perf_metrics(void) +{ + static bool has_perf_metrics; + static bool cached; + struct perf_pmu *pmu; + + if (cached) + return has_perf_metrics; + + /* + * The perf metrics feature is a core PMU feature. + * The PERF_TYPE_RAW type is the type of a core PMU. + * The slots event is only available when the core PMU + * supports the perf metrics feature. + */ + pmu = perf_pmu__find_by_type(PERF_TYPE_RAW); + if (pmu && pmu_have_event(pmu->name, "slots")) + has_perf_metrics = true; + + cached = true; + return has_perf_metrics; +} + +/* + * Check whether we can use a group for top down. + * Without a group may get bad results due to multiplexing. + */ +bool arch_topdown_check_group(bool *warn) +{ + int n; + + if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0) + return false; + if (n > 0) { + *warn = true; + return false; + } + return true; +} + +void arch_topdown_group_warn(void) +{ + fprintf(stderr, + "nmi_watchdog enabled with topdown. May give wrong results.\n" + "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n"); +} + +#define TOPDOWN_SLOTS 0x0400 + +/* + * Check whether a topdown group supports sample-read. + * + * Only Topdown metric supports sample-read. The slots + * event must be the leader of the topdown group. + */ + +bool arch_topdown_sample_read(struct evsel *leader) +{ + if (!evsel__sys_has_perf_metrics(leader)) + return false; + + if (leader->core.attr.config == TOPDOWN_SLOTS) + return true; + + return false; +} + +const char *arch_get_topdown_pmu_name(struct evlist *evlist, bool warn) +{ + const char *pmu_name; + + if (!perf_pmu__has_hybrid()) + return "cpu"; + + if (!evlist->hybrid_pmu_name) { + if (warn) + pr_warning("WARNING: default to use cpu_core topdown events\n"); + evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu("core"); + } + + pmu_name = evlist->hybrid_pmu_name; + + return pmu_name; +} + +int topdown_parse_events(struct evlist *evlist) +{ + const char *topdown_events; + const char *pmu_name; + + if (!topdown_sys_has_perf_metrics()) + return 0; + + pmu_name = arch_get_topdown_pmu_name(evlist, false); + + if (pmu_have_event(pmu_name, "topdown-heavy-ops")) { + if (!strcmp(pmu_name, "cpu_core")) + topdown_events = TOPDOWN_L2_EVENTS_CORE; + else + topdown_events = TOPDOWN_L2_EVENTS; + } else { + if (!strcmp(pmu_name, "cpu_core")) + topdown_events = TOPDOWN_L1_EVENTS_CORE; + else + topdown_events = TOPDOWN_L1_EVENTS; + } + + return parse_event(evlist, topdown_events); +} diff --git a/tools/perf/arch/x86/util/topdown.h b/tools/perf/arch/x86/util/topdown.h new file mode 100644 index 000000000..7eb81f042 --- /dev/null +++ b/tools/perf/arch/x86/util/topdown.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOPDOWN_H +#define _TOPDOWN_H 1 + +bool topdown_sys_has_perf_metrics(void); +int topdown_parse_events(struct evlist *evlist); + +#endif diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c new file mode 100644 index 000000000..eb2b5195b --- /dev/null +++ b/tools/perf/arch/x86/util/tsc.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/types.h> +#include <math.h> +#include <string.h> + +#include "../../../util/debug.h" +#include "../../../util/tsc.h" +#include "cpuid.h" + +u64 rdtsc(void) +{ + unsigned int low, high; + + asm volatile("rdtsc" : "=a" (low), "=d" (high)); + + return low | ((u64)high) << 32; +} + +/* + * Derive the TSC frequency in Hz from the /proc/cpuinfo, for example: + * ... + * model name : Intel(R) Xeon(R) Gold 6154 CPU @ 3.00GHz + * ... + * will return 3000000000. + */ +static double cpuinfo_tsc_freq(void) +{ + double result = 0; + FILE *cpuinfo; + char *line = NULL; + size_t len = 0; + + cpuinfo = fopen("/proc/cpuinfo", "r"); + if (!cpuinfo) { + pr_err("Failed to read /proc/cpuinfo for TSC frequency"); + return NAN; + } + while (getline(&line, &len, cpuinfo) > 0) { + if (!strncmp(line, "model name", 10)) { + char *pos = strstr(line + 11, " @ "); + + if (pos && sscanf(pos, " @ %lfGHz", &result) == 1) { + result *= 1000000000; + goto out; + } + } + } +out: + if (fpclassify(result) == FP_ZERO) + pr_err("Failed to find TSC frequency in /proc/cpuinfo"); + + free(line); + fclose(cpuinfo); + return result; +} + +double arch_get_tsc_freq(void) +{ + unsigned int a, b, c, d, lvl; + static bool cached; + static double tsc; + char vendor[16]; + + if (cached) + return tsc; + + cached = true; + get_cpuid_0(vendor, &lvl); + if (!strstr(vendor, "Intel")) + return 0; + + /* + * Don't support Time Stamp Counter and + * Nominal Core Crystal Clock Information Leaf. + */ + if (lvl < 0x15) { + tsc = cpuinfo_tsc_freq(); + return tsc; + } + + cpuid(0x15, 0, &a, &b, &c, &d); + /* TSC frequency is not enumerated */ + if (!a || !b || !c) { + tsc = cpuinfo_tsc_freq(); + return tsc; + } + + tsc = (double)c * (double)b / (double)a; + return tsc; +} diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c new file mode 100644 index 000000000..eea2bf872 --- /dev/null +++ b/tools/perf/arch/x86/util/unwind-libdw.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <elfutils/libdwfl.h> +#include "../../../util/unwind-libdw.h" +#include "../../../util/perf_regs.h" +#include "../../../util/event.h" + +bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) +{ + struct unwind_info *ui = arg; + struct regs_dump *user_regs = &ui->sample->user_regs; + Dwarf_Word dwarf_regs[17]; + unsigned nregs; + +#define REG(r) ({ \ + Dwarf_Word val = 0; \ + perf_reg_value(&val, user_regs, PERF_REG_X86_##r); \ + val; \ +}) + + if (user_regs->abi == PERF_SAMPLE_REGS_ABI_32) { + dwarf_regs[0] = REG(AX); + dwarf_regs[1] = REG(CX); + dwarf_regs[2] = REG(DX); + dwarf_regs[3] = REG(BX); + dwarf_regs[4] = REG(SP); + dwarf_regs[5] = REG(BP); + dwarf_regs[6] = REG(SI); + dwarf_regs[7] = REG(DI); + dwarf_regs[8] = REG(IP); + nregs = 9; + } else { + dwarf_regs[0] = REG(AX); + dwarf_regs[1] = REG(DX); + dwarf_regs[2] = REG(CX); + dwarf_regs[3] = REG(BX); + dwarf_regs[4] = REG(SI); + dwarf_regs[5] = REG(DI); + dwarf_regs[6] = REG(BP); + dwarf_regs[7] = REG(SP); + dwarf_regs[8] = REG(R8); + dwarf_regs[9] = REG(R9); + dwarf_regs[10] = REG(R10); + dwarf_regs[11] = REG(R11); + dwarf_regs[12] = REG(R12); + dwarf_regs[13] = REG(R13); + dwarf_regs[14] = REG(R14); + dwarf_regs[15] = REG(R15); + dwarf_regs[16] = REG(IP); + nregs = 17; + } + + return dwfl_thread_state_registers(thread, 0, nregs, dwarf_regs); +} diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c new file mode 100644 index 000000000..47357973b --- /dev/null +++ b/tools/perf/arch/x86/util/unwind-libunwind.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include "../../util/debug.h" +#ifndef REMOTE_UNWIND_LIBUNWIND +#include <libunwind.h> +#include "perf_regs.h" +#include "../../util/unwind.h" +#endif + +#ifdef HAVE_ARCH_X86_64_SUPPORT +int LIBUNWIND__ARCH_REG_ID(int regnum) +{ + int id; + + switch (regnum) { + case UNW_X86_64_RAX: + id = PERF_REG_X86_AX; + break; + case UNW_X86_64_RDX: + id = PERF_REG_X86_DX; + break; + case UNW_X86_64_RCX: + id = PERF_REG_X86_CX; + break; + case UNW_X86_64_RBX: + id = PERF_REG_X86_BX; + break; + case UNW_X86_64_RSI: + id = PERF_REG_X86_SI; + break; + case UNW_X86_64_RDI: + id = PERF_REG_X86_DI; + break; + case UNW_X86_64_RBP: + id = PERF_REG_X86_BP; + break; + case UNW_X86_64_RSP: + id = PERF_REG_X86_SP; + break; + case UNW_X86_64_R8: + id = PERF_REG_X86_R8; + break; + case UNW_X86_64_R9: + id = PERF_REG_X86_R9; + break; + case UNW_X86_64_R10: + id = PERF_REG_X86_R10; + break; + case UNW_X86_64_R11: + id = PERF_REG_X86_R11; + break; + case UNW_X86_64_R12: + id = PERF_REG_X86_R12; + break; + case UNW_X86_64_R13: + id = PERF_REG_X86_R13; + break; + case UNW_X86_64_R14: + id = PERF_REG_X86_R14; + break; + case UNW_X86_64_R15: + id = PERF_REG_X86_R15; + break; + case UNW_X86_64_RIP: + id = PERF_REG_X86_IP; + break; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } + + return id; +} +#else +int LIBUNWIND__ARCH_REG_ID(int regnum) +{ + int id; + + switch (regnum) { + case UNW_X86_EAX: + id = PERF_REG_X86_AX; + break; + case UNW_X86_EDX: + id = PERF_REG_X86_DX; + break; + case UNW_X86_ECX: + id = PERF_REG_X86_CX; + break; + case UNW_X86_EBX: + id = PERF_REG_X86_BX; + break; + case UNW_X86_ESI: + id = PERF_REG_X86_SI; + break; + case UNW_X86_EDI: + id = PERF_REG_X86_DI; + break; + case UNW_X86_EBP: + id = PERF_REG_X86_BP; + break; + case UNW_X86_ESP: + id = PERF_REG_X86_SP; + break; + case UNW_X86_EIP: + id = PERF_REG_X86_IP; + break; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } + + return id; +} +#endif /* HAVE_ARCH_X86_64_SUPPORT */ |