From 2957e9a7ea070524508a846205689431cb5c101f Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 1 Jul 2024 19:13:54 +0200 Subject: Adding upstream version 6.9.7. Signed-off-by: Daniel Baumann --- tools/perf/Documentation/perf-list.txt | 1 + tools/perf/Makefile.perf | 7 +- tools/perf/bench/inject-buildid.c | 2 +- tools/perf/bench/uprobe.c | 2 +- tools/perf/builtin-annotate.c | 2 - tools/perf/builtin-daemon.c | 4 +- tools/perf/builtin-record.c | 14 +- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-sched.c | 7 +- tools/perf/builtin-script.c | 2 +- .../pmu-events/arch/s390/cf_z16/transaction.json | 28 +-- tools/perf/pmu-events/arch/s390/mapfile.csv | 2 +- tools/perf/tests/builtin-test.c | 37 +--- tools/perf/tests/code-reading.c | 10 +- tools/perf/tests/shell/test_arm_coresight.sh | 2 +- tools/perf/tests/workloads/datasym.c | 16 ++ tools/perf/ui/browser.c | 6 +- tools/perf/ui/browser.h | 2 +- tools/perf/util/annotate.c | 19 +- tools/perf/util/auxtrace.c | 4 +- tools/perf/util/dwarf-aux.c | 202 ++++++++++++++------- tools/perf/util/dwarf-aux.h | 17 ++ .../perf/util/intel-pt-decoder/intel-pt-decoder.c | 2 + tools/perf/util/intel-pt.c | 2 + tools/perf/util/machine.c | 2 +- tools/perf/util/perf_event_attr_fprintf.c | 26 ++- tools/perf/util/pmu.c | 119 +++++++++--- tools/perf/util/pmu.h | 7 +- tools/perf/util/probe-event.c | 1 + tools/perf/util/python.c | 10 + tools/perf/util/stat-display.c | 3 + tools/perf/util/symbol.c | 49 +++-- tools/perf/util/thread.c | 14 +- 33 files changed, 413 insertions(+), 210 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 3b1259519..6bf2468f5 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -71,6 +71,7 @@ counted. The following modifiers exist: D - pin the event to the PMU W - group is weak and will fallback to non-group if not schedulable, e - group or event are exclusive and do not share the PMU + b - use BPF aggregration (see perf stat --bpf-counters) The 'p' modifier can be used for specifying how precise the instruction address should be. The 'p' modifier can be specified multiple times: diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 04d89d2ed..d769aa447 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -458,18 +458,19 @@ SHELL = $(SHELL_PATH) arm64_gen_sysreg_dir := $(srctree)/tools/arch/arm64/tools ifneq ($(OUTPUT),) - arm64_gen_sysreg_outdir := $(OUTPUT) + arm64_gen_sysreg_outdir := $(abspath $(OUTPUT)) else arm64_gen_sysreg_outdir := $(CURDIR) endif arm64-sysreg-defs: FORCE - $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) O=$(arm64_gen_sysreg_outdir) + $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) O=$(arm64_gen_sysreg_outdir) \ + prefix= subdir= arm64-sysreg-defs-clean: $(call QUIET_CLEAN,arm64-sysreg-defs) $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) O=$(arm64_gen_sysreg_outdir) \ - clean > /dev/null + prefix= subdir= clean > /dev/null beauty_linux_dir := $(srctree)/tools/perf/trace/beauty/include/linux/ linux_uapi_dir := $(srctree)/tools/include/uapi/linux diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index 49331743c..a759eb232 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -362,7 +362,7 @@ static int inject_build_id(struct bench_data *data, u64 *max_rss) return -1; for (i = 0; i < nr_mmaps; i++) { - int idx = rand() % (nr_dsos - 1); + int idx = rand() % nr_dsos; struct bench_dso *dso = &dsos[idx]; u64 timestamp = rand() % 1000000; diff --git a/tools/perf/bench/uprobe.c b/tools/perf/bench/uprobe.c index 5c71fdc41..b722ff88f 100644 --- a/tools/perf/bench/uprobe.c +++ b/tools/perf/bench/uprobe.c @@ -47,7 +47,7 @@ static const char * const bench_uprobe_usage[] = { #define bench_uprobe__attach_uprobe(prog) \ skel->links.prog = bpf_program__attach_uprobe_opts(/*prog=*/skel->progs.prog, \ /*pid=*/-1, \ - /*binary_path=*/"/lib64/libc.so.6", \ + /*binary_path=*/"libc.so.6", \ /*func_offset=*/0, \ /*opts=*/&uprobe_opts); \ if (!skel->links.prog) { \ diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 6c1cc7976..9cd97fd76 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -809,8 +809,6 @@ int cmd_annotate(int argc, const char **argv) "Enable symbol demangling"), OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, "Enable kernel symbol demangling"), - OPT_BOOLEAN(0, "group", &symbol_conf.event_group, - "Show event group information together"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 83954af36..de76bbc50 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -523,7 +523,7 @@ static int daemon_session__control(struct daemon_session *session, session->base, SESSION_CONTROL); control = open(control_path, O_WRONLY|O_NONBLOCK); - if (!control) + if (control < 0) return -1; if (do_ack) { @@ -532,7 +532,7 @@ static int daemon_session__control(struct daemon_session *session, session->base, SESSION_ACK); ack = open(ack_path, O_RDONLY, O_NONBLOCK); - if (!ack) { + if (ack < 0) { close(control); return -1; } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ff7e1d6cf..45c755fb5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1355,8 +1355,6 @@ static int record__open(struct record *rec) struct record_opts *opts = &rec->opts; int rc = 0; - evlist__config(evlist, opts, &callchain_param); - evlist__for_each_entry(evlist, pos) { try_again: if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { @@ -1958,8 +1956,7 @@ static void record__read_lost_samples(struct record *rec) if (count.lost) { if (!lost) { - lost = zalloc(sizeof(*lost) + - session->machines.host.id_hdr_size); + lost = zalloc(PERF_SAMPLE_MAX_SIZE); if (!lost) { pr_debug("Memory allocation failed\n"); return; @@ -1975,8 +1972,7 @@ static void record__read_lost_samples(struct record *rec) lost_count = perf_bpf_filter__lost_count(evsel); if (lost_count) { if (!lost) { - lost = zalloc(sizeof(*lost) + - session->machines.host.id_hdr_size); + lost = zalloc(PERF_SAMPLE_MAX_SIZE); if (!lost) { pr_debug("Memory allocation failed\n"); return; @@ -2483,6 +2479,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) evlist__uniquify_name(rec->evlist); + evlist__config(rec->evlist, opts, &callchain_param); + /* Debug message used by test scripts */ pr_debug3("perf record opening and mmapping events\n"); if (record__open(rec) != 0) { @@ -2881,10 +2879,10 @@ out_delete_session: } #endif zstd_fini(&session->zstd_data); - perf_session__delete(session); - if (!opts->no_bpf_event) evlist__stop_sb_thread(rec->sb_evlist); + + perf_session__delete(session); return status; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index dcd93ee5f..5b684d2ab 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -428,7 +428,7 @@ static int report__setup_sample_type(struct report *rep) * compatibility, set the bit if it's an old perf data file. */ evlist__for_each_entry(session->evlist, evsel) { - if (strstr(evsel->name, "arm_spe") && + if (strstr(evsel__name(evsel), "arm_spe") && !(sample_type & PERF_SAMPLE_DATA_SRC)) { evsel->core.attr.sample_type |= PERF_SAMPLE_DATA_SRC; sample_type |= PERF_SAMPLE_DATA_SRC; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index b248c4335..1bfb22347 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2963,8 +2963,11 @@ static int timehist_check_attr(struct perf_sched *sched, return -1; } - if (sched->show_callchain && !evsel__has_callchain(evsel)) { - pr_info("Samples do not have callchains.\n"); + /* only need to save callchain related to sched_switch event */ + if (sched->show_callchain && + evsel__name_is(evsel, "sched:sched_switch") && + !evsel__has_callchain(evsel)) { + pr_info("Samples of sched_switch event do not have callchains.\n"); sched->show_callchain = 0; symbol_conf.use_callchain = 0; } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 37088cc0f..2e7148d66 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3806,7 +3806,7 @@ static int parse_insn_trace(const struct option *opt __maybe_unused, if (ret < 0) return ret; - itrace_parse_synth_opts(opt, "i0ns", 0); + itrace_parse_synth_opts(opt, "i0nse", 0); symbol_conf.nanosecs = true; return 0; } diff --git a/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json index ec2ff78e2..3ab1d3a66 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json +++ b/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json @@ -2,71 +2,71 @@ { "BriefDescription": "Transaction count", "MetricName": "transaction", - "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL" + "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL if has_event(TX_C_TEND) else 0" }, { "BriefDescription": "Cycles per Instruction", "MetricName": "cpi", - "MetricExpr": "CPU_CYCLES / INSTRUCTIONS" + "MetricExpr": "CPU_CYCLES / INSTRUCTIONS if has_event(INSTRUCTIONS) else 0" }, { "BriefDescription": "Problem State Instruction Ratio", "MetricName": "prbstate", - "MetricExpr": "(PROBLEM_STATE_INSTRUCTIONS / INSTRUCTIONS) * 100" + "MetricExpr": "(PROBLEM_STATE_INSTRUCTIONS / INSTRUCTIONS) * 100 if has_event(INSTRUCTIONS) else 0" }, { "BriefDescription": "Level One Miss per 100 Instructions", "MetricName": "l1mp", - "MetricExpr": "((L1I_DIR_WRITES + L1D_DIR_WRITES) / INSTRUCTIONS) * 100" + "MetricExpr": "((L1I_DIR_WRITES + L1D_DIR_WRITES) / INSTRUCTIONS) * 100 if has_event(INSTRUCTIONS) else 0" }, { "BriefDescription": "Percentage sourced from Level 2 cache", "MetricName": "l2p", - "MetricExpr": "((DCW_REQ + DCW_REQ_IV + ICW_REQ + ICW_REQ_IV) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100" + "MetricExpr": "((DCW_REQ + DCW_REQ_IV + ICW_REQ + ICW_REQ_IV) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_REQ) else 0" }, { "BriefDescription": "Percentage sourced from Level 3 on same chip cache", "MetricName": "l3p", - "MetricExpr": "((DCW_REQ_CHIP_HIT + DCW_ON_CHIP + DCW_ON_CHIP_IV + DCW_ON_CHIP_CHIP_HIT + ICW_REQ_CHIP_HIT + ICW_ON_CHIP + ICW_ON_CHIP_IV + ICW_ON_CHIP_CHIP_HIT) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100" + "MetricExpr": "((DCW_REQ_CHIP_HIT + DCW_ON_CHIP + DCW_ON_CHIP_IV + DCW_ON_CHIP_CHIP_HIT + ICW_REQ_CHIP_HIT + ICW_ON_CHIP + ICW_ON_CHIP_IV + ICW_ON_CHIP_CHIP_HIT) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_REQ_CHIP_HIT) else 0" }, { "BriefDescription": "Percentage sourced from Level 4 Local cache on same book", "MetricName": "l4lp", - "MetricExpr": "((DCW_REQ_DRAWER_HIT + DCW_ON_CHIP_DRAWER_HIT + DCW_ON_MODULE + DCW_ON_DRAWER + IDCW_ON_MODULE_IV + IDCW_ON_MODULE_CHIP_HIT + IDCW_ON_MODULE_DRAWER_HIT + IDCW_ON_DRAWER_IV + IDCW_ON_DRAWER_CHIP_HIT + IDCW_ON_DRAWER_DRAWER_HIT + ICW_REQ_DRAWER_HIT + ICW_ON_CHIP_DRAWER_HIT + ICW_ON_MODULE + ICW_ON_DRAWER) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100" + "MetricExpr": "((DCW_REQ_DRAWER_HIT + DCW_ON_CHIP_DRAWER_HIT + DCW_ON_MODULE + DCW_ON_DRAWER + IDCW_ON_MODULE_IV + IDCW_ON_MODULE_CHIP_HIT + IDCW_ON_MODULE_DRAWER_HIT + IDCW_ON_DRAWER_IV + IDCW_ON_DRAWER_CHIP_HIT + IDCW_ON_DRAWER_DRAWER_HIT + ICW_REQ_DRAWER_HIT + ICW_ON_CHIP_DRAWER_HIT + ICW_ON_MODULE + ICW_ON_DRAWER) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_REQ_DRAWER_HIT) else 0" }, { "BriefDescription": "Percentage sourced from Level 4 Remote cache on different book", "MetricName": "l4rp", - "MetricExpr": "((DCW_OFF_DRAWER + IDCW_OFF_DRAWER_IV + IDCW_OFF_DRAWER_CHIP_HIT + IDCW_OFF_DRAWER_DRAWER_HIT + ICW_OFF_DRAWER) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100" + "MetricExpr": "((DCW_OFF_DRAWER + IDCW_OFF_DRAWER_IV + IDCW_OFF_DRAWER_CHIP_HIT + IDCW_OFF_DRAWER_DRAWER_HIT + ICW_OFF_DRAWER) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_OFF_DRAWER) else 0" }, { "BriefDescription": "Percentage sourced from memory", "MetricName": "memp", - "MetricExpr": "((DCW_ON_CHIP_MEMORY + DCW_ON_MODULE_MEMORY + DCW_ON_DRAWER_MEMORY + DCW_OFF_DRAWER_MEMORY + ICW_ON_CHIP_MEMORY + ICW_ON_MODULE_MEMORY + ICW_ON_DRAWER_MEMORY + ICW_OFF_DRAWER_MEMORY) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100" + "MetricExpr": "((DCW_ON_CHIP_MEMORY + DCW_ON_MODULE_MEMORY + DCW_ON_DRAWER_MEMORY + DCW_OFF_DRAWER_MEMORY + ICW_ON_CHIP_MEMORY + ICW_ON_MODULE_MEMORY + ICW_ON_DRAWER_MEMORY + ICW_OFF_DRAWER_MEMORY) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_ON_CHIP_MEMORY) else 0" }, { "BriefDescription": "Cycles per Instructions from Finite cache/memory", "MetricName": "finite_cpi", - "MetricExpr": "L1C_TLB2_MISSES / INSTRUCTIONS" + "MetricExpr": "L1C_TLB2_MISSES / INSTRUCTIONS if has_event(L1C_TLB2_MISSES) else 0" }, { "BriefDescription": "Estimated Instruction Complexity CPI infinite Level 1", "MetricName": "est_cpi", - "MetricExpr": "(CPU_CYCLES / INSTRUCTIONS) - (L1C_TLB2_MISSES / INSTRUCTIONS)" + "MetricExpr": "(CPU_CYCLES / INSTRUCTIONS) - (L1C_TLB2_MISSES / INSTRUCTIONS) if has_event(INSTRUCTIONS) else 0" }, { "BriefDescription": "Estimated Sourcing Cycles per Level 1 Miss", "MetricName": "scpl1m", - "MetricExpr": "L1C_TLB2_MISSES / (L1I_DIR_WRITES + L1D_DIR_WRITES)" + "MetricExpr": "L1C_TLB2_MISSES / (L1I_DIR_WRITES + L1D_DIR_WRITES) if has_event(L1C_TLB2_MISSES) else 0" }, { "BriefDescription": "Estimated TLB CPU percentage of Total CPU", "MetricName": "tlb_percent", - "MetricExpr": "((DTLB2_MISSES + ITLB2_MISSES) / CPU_CYCLES) * (L1C_TLB2_MISSES / (L1I_PENALTY_CYCLES + L1D_PENALTY_CYCLES)) * 100" + "MetricExpr": "((DTLB2_MISSES + ITLB2_MISSES) / CPU_CYCLES) * (L1C_TLB2_MISSES / (L1I_PENALTY_CYCLES + L1D_PENALTY_CYCLES)) * 100 if has_event(CPU_CYCLES) else 0" }, { "BriefDescription": "Estimated Cycles per TLB Miss", "MetricName": "tlb_miss", - "MetricExpr": "((DTLB2_MISSES + ITLB2_MISSES) / (DTLB2_WRITES + ITLB2_WRITES)) * (L1C_TLB2_MISSES / (L1I_PENALTY_CYCLES + L1D_PENALTY_CYCLES))" + "MetricExpr": "((DTLB2_MISSES + ITLB2_MISSES) / (DTLB2_WRITES + ITLB2_WRITES)) * (L1C_TLB2_MISSES / (L1I_PENALTY_CYCLES + L1D_PENALTY_CYCLES)) if has_event(DTLB2_MISSES) else 0" } ] diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv index a918e1af7..b22648d12 100644 --- a/tools/perf/pmu-events/arch/s390/mapfile.csv +++ b/tools/perf/pmu-events/arch/s390/mapfile.csv @@ -5,4 +5,4 @@ Family-model,Version,Filename,EventType ^IBM.296[45].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z13,core ^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core ^IBM.856[12].*3\.6.[[:xdigit:]]+$,3,cf_z15,core -^IBM.393[12].*3\.7.[[:xdigit:]]+$,3,cf_z16,core +^IBM.393[12].*$,3,cf_z16,core diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index d13ee7683..e05b370b1 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -274,11 +274,8 @@ static int finish_test(struct child_test *child_test, int width) struct test_suite *t = child_test->test; int i = child_test->test_num; int subi = child_test->subtest; - int out = child_test->process.out; int err = child_test->process.err; - bool out_done = out <= 0; bool err_done = err <= 0; - struct strbuf out_output = STRBUF_INIT; struct strbuf err_output = STRBUF_INIT; int ret; @@ -290,11 +287,9 @@ static int finish_test(struct child_test *child_test, int width) pr_info("%3d: %-*s:\n", i + 1, width, test_description(t, -1)); /* - * Busy loop reading from the child's stdout and stderr that are set to - * be non-blocking until EOF. + * Busy loop reading from the child's stdout/stderr that are set to be + * non-blocking until EOF. */ - if (!out_done) - fcntl(out, F_SETFL, O_NONBLOCK); if (!err_done) fcntl(err, F_SETFL, O_NONBLOCK); if (verbose > 1) { @@ -303,11 +298,8 @@ static int finish_test(struct child_test *child_test, int width) else pr_info("%3d: %s:\n", i + 1, test_description(t, -1)); } - while (!out_done || !err_done) { - struct pollfd pfds[2] = { - { .fd = out, - .events = POLLIN | POLLERR | POLLHUP | POLLNVAL, - }, + while (!err_done) { + struct pollfd pfds[1] = { { .fd = err, .events = POLLIN | POLLERR | POLLHUP | POLLNVAL, }, @@ -317,21 +309,7 @@ static int finish_test(struct child_test *child_test, int width) /* Poll to avoid excessive spinning, timeout set for 1000ms. */ poll(pfds, ARRAY_SIZE(pfds), /*timeout=*/1000); - if (!out_done && pfds[0].revents) { - errno = 0; - len = read(out, buf, sizeof(buf) - 1); - - if (len <= 0) { - out_done = errno != EAGAIN; - } else { - buf[len] = '\0'; - if (verbose > 1) - fprintf(stdout, "%s", buf); - else - strbuf_addstr(&out_output, buf); - } - } - if (!err_done && pfds[1].revents) { + if (!err_done && pfds[0].revents) { errno = 0; len = read(err, buf, sizeof(buf) - 1); @@ -354,14 +332,10 @@ static int finish_test(struct child_test *child_test, int width) pr_info("%3d.%1d: %s:\n", i + 1, subi + 1, test_description(t, subi)); else pr_info("%3d: %s:\n", i + 1, test_description(t, -1)); - fprintf(stdout, "%s", out_output.buf); fprintf(stderr, "%s", err_output.buf); } - strbuf_release(&out_output); strbuf_release(&err_output); print_test_result(t, i, subi, ret, width); - if (out > 0) - close(out); if (err > 0) close(err); return 0; @@ -394,6 +368,7 @@ static int start_test(struct test_suite *test, int i, int subi, struct child_tes (*child)->process.no_stdout = 1; (*child)->process.no_stderr = 1; } else { + (*child)->process.stdout_to_stderr = 1; (*child)->process.out = -1; (*child)->process.err = -1; } diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 7a3a7bbbe..29d2f3ee4 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -637,11 +637,11 @@ static int do_test_code_reading(bool try_kcore) evlist__config(evlist, &opts, NULL); - evsel = evlist__first(evlist); - - evsel->core.attr.comm = 1; - evsel->core.attr.disabled = 1; - evsel->core.attr.enable_on_exec = 0; + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.comm = 1; + evsel->core.attr.disabled = 1; + evsel->core.attr.enable_on_exec = 0; + } ret = evlist__open(evlist); if (ret < 0) { diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 65dd85207..3302ea0b9 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -188,7 +188,7 @@ arm_cs_etm_snapshot_test() { arm_cs_etm_basic_test() { echo "Recording trace with '$*'" - perf record -o ${perfdata} "$@" -- ls > /dev/null 2>&1 + perf record -o ${perfdata} "$@" -m,8M -- ls > /dev/null 2>&1 perf_script_branch_samples ls && perf_report_branch_samples ls && diff --git a/tools/perf/tests/workloads/datasym.c b/tools/perf/tests/workloads/datasym.c index ddd40bc63..8e08fc75a 100644 --- a/tools/perf/tests/workloads/datasym.c +++ b/tools/perf/tests/workloads/datasym.c @@ -16,6 +16,22 @@ static int datasym(int argc __maybe_unused, const char **argv __maybe_unused) { for (;;) { buf1.data1++; + if (buf1.data1 == 123) { + /* + * Add some 'noise' in the loop to work around errata + * 1694299 on Arm N1. + * + * Bias exists in SPE sampling which can cause the load + * and store instructions to be skipped entirely. This + * comes and goes randomly depending on the offset the + * linker places the datasym loop at in the Perf binary. + * With an extra branch in the middle of the loop that + * isn't always taken, the instruction stream is no + * longer a continuous repeating pattern that interacts + * badly with the bias. + */ + buf1.data1++; + } buf1.data2 += buf1.data1; } return 0; diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 603d11283..19503e838 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -203,7 +203,7 @@ void ui_browser__refresh_dimensions(struct ui_browser *browser) void ui_browser__handle_resize(struct ui_browser *browser) { ui__refresh_dimensions(false); - ui_browser__show(browser, browser->title, ui_helpline__current); + ui_browser__show(browser, browser->title ?: "", ui_helpline__current); ui_browser__refresh(browser); } @@ -287,7 +287,8 @@ int ui_browser__show(struct ui_browser *browser, const char *title, mutex_lock(&ui__lock); __ui_browser__show_title(browser, title); - browser->title = title; + free(browser->title); + browser->title = strdup(title); zfree(&browser->helpline); va_start(ap, helpline); @@ -304,6 +305,7 @@ void ui_browser__hide(struct ui_browser *browser) mutex_lock(&ui__lock); ui_helpline__pop(); zfree(&browser->helpline); + zfree(&browser->title); mutex_unlock(&ui__lock); } diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index 510ce4554..6e98d5f8f 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -21,7 +21,7 @@ struct ui_browser { u8 extra_title_lines; int current_color; void *priv; - const char *title; + char *title; char *helpline; const char *no_samples_msg; void (*refresh_dimensions)(struct ui_browser *browser); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 50ca92255..79d082155 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -887,10 +887,17 @@ static struct annotated_source *annotated_source__new(void) static __maybe_unused void annotated_source__delete(struct annotated_source *src) { + struct hashmap_entry *cur; + size_t bkt; + if (src == NULL) return; - hashmap__free(src->samples); + if (src->samples) { + hashmap__for_each_entry(src->samples, cur, bkt) + zfree(&cur->pvalue); + hashmap__free(src->samples); + } zfree(&src->histograms); free(src); } @@ -3025,7 +3032,7 @@ void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *m annotation__update_column_widths(notes); } -static void annotation__calc_lines(struct annotation *notes, struct map *map, +static void annotation__calc_lines(struct annotation *notes, struct map_symbol *ms, struct rb_root *root) { struct annotation_line *al; @@ -3033,6 +3040,7 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, list_for_each_entry(al, ¬es->src->source, node) { double percent_max = 0.0; + u64 addr; int i; for (i = 0; i < al->data_nr; i++) { @@ -3048,8 +3056,9 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, if (percent_max <= 0.5) continue; - al->path = get_srcline(map__dso(map), notes->start + al->offset, NULL, - false, true, notes->start + al->offset); + addr = map__rip_2objdump(ms->map, ms->sym->start); + al->path = get_srcline(map__dso(ms->map), addr + al->offset, NULL, + false, true, ms->sym->start + al->offset); insert_source_line(&tmp_root, al); } @@ -3060,7 +3069,7 @@ static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root) { struct annotation *notes = symbol__annotation(ms->sym); - annotation__calc_lines(notes, ms->map, root); + annotation__calc_lines(notes, ms, root); } int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 3684e6009..ef314a579 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1466,6 +1466,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, char *endptr; bool period_type_set = false; bool period_set = false; + bool iy = false; synth_opts->set = true; @@ -1484,6 +1485,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, switch (*p++) { case 'i': case 'y': + iy = true; if (p[-1] == 'y') synth_opts->cycles = true; else @@ -1649,7 +1651,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, } } out: - if (synth_opts->instructions || synth_opts->cycles) { + if (iy) { if (!period_type_set) synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 279112606..f93e57e2f 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1136,6 +1136,68 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf) return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); } +#if defined(HAVE_DWARF_GETLOCATIONS_SUPPORT) || defined(HAVE_DWARF_CFI_SUPPORT) +static int reg_from_dwarf_op(Dwarf_Op *op) +{ + switch (op->atom) { + case DW_OP_reg0 ... DW_OP_reg31: + return op->atom - DW_OP_reg0; + case DW_OP_breg0 ... DW_OP_breg31: + return op->atom - DW_OP_breg0; + case DW_OP_regx: + case DW_OP_bregx: + return op->number; + default: + break; + } + return -1; +} + +static int offset_from_dwarf_op(Dwarf_Op *op) +{ + switch (op->atom) { + case DW_OP_reg0 ... DW_OP_reg31: + case DW_OP_regx: + return 0; + case DW_OP_breg0 ... DW_OP_breg31: + return op->number; + case DW_OP_bregx: + return op->number2; + default: + break; + } + return -1; +} + +static bool check_allowed_ops(Dwarf_Op *ops, size_t nops) +{ + /* The first op is checked separately */ + ops++; + nops--; + + /* + * It needs to make sure if the location expression matches to the given + * register and offset exactly. Thus it rejects any complex expressions + * and only allows a few of selected operators that doesn't change the + * location. + */ + while (nops) { + switch (ops->atom) { + case DW_OP_stack_value: + case DW_OP_deref_size: + case DW_OP_deref: + case DW_OP_piece: + break; + default: + return false; + } + ops++; + nops--; + } + return true; +} +#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT || HAVE_DWARF_CFI_SUPPORT */ + #ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT /** * die_get_var_innermost_scope - Get innermost scope range of given variable DIE @@ -1280,7 +1342,7 @@ struct find_var_data { #define DWARF_OP_DIRECT_REGS 32 static bool match_var_offset(Dwarf_Die *die_mem, struct find_var_data *data, - u64 addr_offset, u64 addr_type) + u64 addr_offset, u64 addr_type, bool is_pointer) { Dwarf_Die type_die; Dwarf_Word size; @@ -1294,6 +1356,12 @@ static bool match_var_offset(Dwarf_Die *die_mem, struct find_var_data *data, if (die_get_real_type(die_mem, &type_die) == NULL) return false; + if (is_pointer && dwarf_tag(&type_die) == DW_TAG_pointer_type) { + /* Get the target type of the pointer */ + if (die_get_real_type(&type_die, &type_die) == NULL) + return false; + } + if (dwarf_aggregate_size(&type_die, &size) < 0) return false; @@ -1305,34 +1373,6 @@ static bool match_var_offset(Dwarf_Die *die_mem, struct find_var_data *data, return true; } -static bool check_allowed_ops(Dwarf_Op *ops, size_t nops) -{ - /* The first op is checked separately */ - ops++; - nops--; - - /* - * It needs to make sure if the location expression matches to the given - * register and offset exactly. Thus it rejects any complex expressions - * and only allows a few of selected operators that doesn't change the - * location. - */ - while (nops) { - switch (ops->atom) { - case DW_OP_stack_value: - case DW_OP_deref_size: - case DW_OP_deref: - case DW_OP_piece: - break; - default: - return false; - } - ops++; - nops--; - } - return true; -} - /* Only checks direct child DIEs in the given scope. */ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg) { @@ -1361,31 +1401,38 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg) if (data->is_fbreg && ops->atom == DW_OP_fbreg && data->offset >= (int)ops->number && check_allowed_ops(ops, nops) && - match_var_offset(die_mem, data, data->offset, ops->number)) + match_var_offset(die_mem, data, data->offset, ops->number, + /*is_pointer=*/false)) return DIE_FIND_CB_END; /* Only match with a simple case */ if (data->reg < DWARF_OP_DIRECT_REGS) { /* pointer variables saved in a register 0 to 31 */ if (ops->atom == (DW_OP_reg0 + data->reg) && - check_allowed_ops(ops, nops)) + check_allowed_ops(ops, nops) && + match_var_offset(die_mem, data, data->offset, 0, + /*is_pointer=*/true)) return DIE_FIND_CB_END; /* Local variables accessed by a register + offset */ if (ops->atom == (DW_OP_breg0 + data->reg) && check_allowed_ops(ops, nops) && - match_var_offset(die_mem, data, data->offset, ops->number)) + match_var_offset(die_mem, data, data->offset, ops->number, + /*is_pointer=*/false)) return DIE_FIND_CB_END; } else { /* pointer variables saved in a register 32 or above */ if (ops->atom == DW_OP_regx && ops->number == data->reg && - check_allowed_ops(ops, nops)) + check_allowed_ops(ops, nops) && + match_var_offset(die_mem, data, data->offset, 0, + /*is_pointer=*/true)) return DIE_FIND_CB_END; /* Local variables accessed by a register + offset */ if (ops->atom == DW_OP_bregx && data->reg == ops->number && check_allowed_ops(ops, nops) && - match_var_offset(die_mem, data, data->offset, ops->number2)) + match_var_offset(die_mem, data, data->offset, ops->number2, + /*is_poitner=*/false)) return DIE_FIND_CB_END; } } @@ -1447,7 +1494,8 @@ static int __die_find_var_addr_cb(Dwarf_Die *die_mem, void *arg) continue; if (check_allowed_ops(ops, nops) && - match_var_offset(die_mem, data, data->addr, ops->number)) + match_var_offset(die_mem, data, data->addr, ops->number, + /*is_pointer=*/false)) return DIE_FIND_CB_END; } return DIE_FIND_CB_SIBLING; @@ -1479,41 +1527,69 @@ Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc, *offset = data.offset; return result; } -#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ -#ifdef HAVE_DWARF_CFI_SUPPORT -static int reg_from_dwarf_op(Dwarf_Op *op) +static int __die_collect_vars_cb(Dwarf_Die *die_mem, void *arg) { - switch (op->atom) { - case DW_OP_reg0 ... DW_OP_reg31: - return op->atom - DW_OP_reg0; - case DW_OP_breg0 ... DW_OP_breg31: - return op->atom - DW_OP_breg0; - case DW_OP_regx: - case DW_OP_bregx: - return op->number; - default: - break; - } - return -1; + struct die_var_type **var_types = arg; + Dwarf_Die type_die; + int tag = dwarf_tag(die_mem); + Dwarf_Attribute attr; + Dwarf_Addr base, start, end; + Dwarf_Op *ops; + size_t nops; + struct die_var_type *vt; + + if (tag != DW_TAG_variable && tag != DW_TAG_formal_parameter) + return DIE_FIND_CB_SIBLING; + + if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL) + return DIE_FIND_CB_SIBLING; + + /* + * Only collect the first location as it can reconstruct the + * remaining state by following the instructions. + * start = 0 means it covers the whole range. + */ + if (dwarf_getlocations(&attr, 0, &base, &start, &end, &ops, &nops) <= 0) + return DIE_FIND_CB_SIBLING; + + if (die_get_real_type(die_mem, &type_die) == NULL) + return DIE_FIND_CB_SIBLING; + + vt = malloc(sizeof(*vt)); + if (vt == NULL) + return DIE_FIND_CB_END; + + vt->die_off = dwarf_dieoffset(&type_die); + vt->addr = start; + vt->reg = reg_from_dwarf_op(ops); + vt->offset = offset_from_dwarf_op(ops); + vt->next = *var_types; + *var_types = vt; + + return DIE_FIND_CB_SIBLING; } -static int offset_from_dwarf_op(Dwarf_Op *op) +/** + * die_collect_vars - Save all variables and parameters + * @sc_die: a scope DIE + * @var_types: a pointer to save the resulting list + * + * Save all variables and parameters in the @sc_die and save them to @var_types. + * The @var_types is a singly-linked list containing type and location info. + * Actual type can be retrieved using dwarf_offdie() with 'die_off' later. + * + * Callers should free @var_types. + */ +void die_collect_vars(Dwarf_Die *sc_die, struct die_var_type **var_types) { - switch (op->atom) { - case DW_OP_reg0 ... DW_OP_reg31: - case DW_OP_regx: - return 0; - case DW_OP_breg0 ... DW_OP_breg31: - return op->number; - case DW_OP_bregx: - return op->number2; - default: - break; - } - return -1; + Dwarf_Die die_mem; + + die_find_child(sc_die, __die_collect_vars_cb, (void *)var_types, &die_mem); } +#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ +#ifdef HAVE_DWARF_CFI_SUPPORT /** * die_get_cfa - Get frame base information * @dwarf: a Dwarf info diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 85dd527ae..efafd3a1f 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -135,6 +135,15 @@ void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die, /* Get the list of including scopes */ int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes); +/* Variable type information */ +struct die_var_type { + struct die_var_type *next; + u64 die_off; + u64 addr; + int reg; + int offset; +}; + #ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT /* Get byte offset range of given variable DIE */ @@ -150,6 +159,9 @@ Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc, Dwarf_Addr addr, Dwarf_Die *die_mem, int *offset); +/* Save all variables and parameters in this scope */ +void die_collect_vars(Dwarf_Die *sc_die, struct die_var_type **var_types); + #else /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, @@ -178,6 +190,11 @@ static inline Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die __maybe_unu return NULL; } +static inline void die_collect_vars(Dwarf_Die *sc_die __maybe_unused, + struct die_var_type **var_types __maybe_unused) +{ +} + #endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ #ifdef HAVE_DWARF_CFI_SUPPORT diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index b450178e3..e733f6b1f 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1319,6 +1319,8 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder, bool no_tip) bool ret = false; decoder->state.type &= ~INTEL_PT_BRANCH; + decoder->state.insn_op = INTEL_PT_OP_OTHER; + decoder->state.insn_len = 0; if (decoder->set_fup_cfe_ip || decoder->set_fup_cfe) { bool ip = decoder->set_fup_cfe_ip; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index f38893e0b..4db9a098f 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -764,6 +764,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, addr_location__init(&al); intel_pt_insn->length = 0; + intel_pt_insn->op = INTEL_PT_OP_OTHER; if (to_ip && *ip == to_ip) goto out_no_cache; @@ -898,6 +899,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, if (to_ip && *ip == to_ip) { intel_pt_insn->length = 0; + intel_pt_insn->op = INTEL_PT_OP_OTHER; goto out_no_cache; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 527517db3..07c22f765 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1549,8 +1549,8 @@ static int machine__update_kernel_mmap(struct machine *machine, updated = map__get(orig); machine->vmlinux_map = updated; - machine__set_kernel_mmap(machine, start, end); maps__remove(machine__kernel_maps(machine), orig); + machine__set_kernel_mmap(machine, start, end); err = maps__insert(machine__kernel_maps(machine), updated); map__put(orig); diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 8f04d3b7f..59fbbba79 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -7,6 +7,8 @@ #include #include #include "util/evsel_fprintf.h" +#include "util/pmu.h" +#include "util/pmus.h" #include "trace-event.h" struct bit_names { @@ -75,9 +77,12 @@ static void __p_read_format(char *buf, size_t size, u64 value) } #define ENUM_ID_TO_STR_CASE(x) case x: return (#x); -static const char *stringify_perf_type_id(u64 value) +static const char *stringify_perf_type_id(struct perf_pmu *pmu, u32 type) { - switch (value) { + if (pmu) + return pmu->name; + + switch (type) { ENUM_ID_TO_STR_CASE(PERF_TYPE_HARDWARE) ENUM_ID_TO_STR_CASE(PERF_TYPE_SOFTWARE) ENUM_ID_TO_STR_CASE(PERF_TYPE_TRACEPOINT) @@ -175,9 +180,9 @@ do { \ #define print_id_unsigned(_s) PRINT_ID(_s, "%"PRIu64) #define print_id_hex(_s) PRINT_ID(_s, "%#"PRIx64) -static void __p_type_id(char *buf, size_t size, u64 value) +static void __p_type_id(struct perf_pmu *pmu, char *buf, size_t size, u64 value) { - print_id_unsigned(stringify_perf_type_id(value)); + print_id_unsigned(stringify_perf_type_id(pmu, value)); } static void __p_config_hw_id(char *buf, size_t size, u64 value) @@ -217,8 +222,14 @@ static void __p_config_tracepoint_id(char *buf, size_t size, u64 value) } #endif -static void __p_config_id(char *buf, size_t size, u32 type, u64 value) +static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type, u64 value) { + const char *name = perf_pmu__name_from_config(pmu, value); + + if (name) { + print_id_hex(name); + return; + } switch (type) { case PERF_TYPE_HARDWARE: return __p_config_hw_id(buf, size, value); @@ -246,8 +257,8 @@ static void __p_config_id(char *buf, size_t size, u32 type, u64 value) #define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val) #define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val) #define p_read_format(val) __p_read_format(buf, BUF_SIZE, val) -#define p_type_id(val) __p_type_id(buf, BUF_SIZE, val) -#define p_config_id(val) __p_config_id(buf, BUF_SIZE, attr->type, val) +#define p_type_id(val) __p_type_id(pmu, buf, BUF_SIZE, val) +#define p_config_id(val) __p_config_id(pmu, buf, BUF_SIZE, attr->type, val) #define PRINT_ATTRn(_n, _f, _p, _a) \ do { \ @@ -262,6 +273,7 @@ do { \ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, attr__fprintf_f attr__fprintf, void *priv) { + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); char buf[BUF_SIZE]; int ret = 0; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index f39cbbc1a..cc349d9cb 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -36,6 +36,18 @@ struct perf_pmu perf_pmu__fake = { #define UNIT_MAX_LEN 31 /* max length for event unit name */ +enum event_source { + /* An event loaded from /sys/devices//events. */ + EVENT_SRC_SYSFS, + /* An event loaded from a CPUID matched json file. */ + EVENT_SRC_CPU_JSON, + /* + * An event loaded from a /sys/devices//identifier matched json + * file. + */ + EVENT_SRC_SYS_JSON, +}; + /** * struct perf_pmu_alias - An event either read from sysfs or builtin in * pmu-events.c, created by parsing the pmu-events json files. @@ -425,9 +437,30 @@ static struct perf_pmu_alias *perf_pmu__find_alias(struct perf_pmu *pmu, { struct perf_pmu_alias *alias; - if (load && !pmu->sysfs_aliases_loaded) - pmu_aliases_parse(pmu); + if (load && !pmu->sysfs_aliases_loaded) { + bool has_sysfs_event; + char event_file_name[FILENAME_MAX + 8]; + + /* + * Test if alias/event 'name' exists in the PMU's sysfs/events + * directory. If not skip parsing the sysfs aliases. Sysfs event + * name must be all lower or all upper case. + */ + scnprintf(event_file_name, sizeof(event_file_name), "events/%s", name); + for (size_t i = 7, n = 7 + strlen(name); i < n; i++) + event_file_name[i] = tolower(event_file_name[i]); + + has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); + if (!has_sysfs_event) { + for (size_t i = 7, n = 7 + strlen(name); i < n; i++) + event_file_name[i] = toupper(event_file_name[i]); + + has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); + } + if (has_sysfs_event) + pmu_aliases_parse(pmu); + } list_for_each_entry(alias, &pmu->aliases, list) { if (!strcasecmp(alias->name, name)) return alias; @@ -500,7 +533,7 @@ static int update_alias(const struct pmu_event *pe, static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, const char *desc, const char *val, FILE *val_fd, - const struct pmu_event *pe) + const struct pmu_event *pe, enum event_source src) { struct perf_pmu_alias *alias; int ret; @@ -552,25 +585,30 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, } snprintf(alias->unit, sizeof(alias->unit), "%s", unit); } - if (!pe) { - /* Update an event from sysfs with json data. */ - struct update_alias_data data = { - .pmu = pmu, - .alias = alias, - }; - + switch (src) { + default: + case EVENT_SRC_SYSFS: alias->from_sysfs = true; if (pmu->events_table) { + /* Update an event from sysfs with json data. */ + struct update_alias_data data = { + .pmu = pmu, + .alias = alias, + }; if (pmu_events_table__find_event(pmu->events_table, pmu, name, update_alias, &data) == 0) - pmu->loaded_json_aliases++; + pmu->cpu_json_aliases++; } - } - - if (!pe) pmu->sysfs_aliases++; - else - pmu->loaded_json_aliases++; + break; + case EVENT_SRC_CPU_JSON: + pmu->cpu_json_aliases++; + break; + case EVENT_SRC_SYS_JSON: + pmu->sys_json_aliases++; + break; + + } list_add_tail(&alias->list, &pmu->aliases); return 0; } @@ -646,7 +684,8 @@ static int pmu_aliases_parse(struct perf_pmu *pmu) } if (perf_pmu__new_alias(pmu, name, /*desc=*/ NULL, - /*val=*/ NULL, file, /*pe=*/ NULL) < 0) + /*val=*/ NULL, file, /*pe=*/ NULL, + EVENT_SRC_SYSFS) < 0) pr_debug("Cannot set up %s\n", name); fclose(file); } @@ -900,7 +939,8 @@ static int pmu_add_cpu_aliases_map_callback(const struct pmu_event *pe, { struct perf_pmu *pmu = vdata; - perf_pmu__new_alias(pmu, pe->name, pe->desc, pe->event, /*val_fd=*/ NULL, pe); + perf_pmu__new_alias(pmu, pe->name, pe->desc, pe->event, /*val_fd=*/ NULL, + pe, EVENT_SRC_CPU_JSON); return 0; } @@ -935,13 +975,14 @@ static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe, return 0; if (pmu_uncore_alias_match(pe->pmu, pmu->name) && - pmu_uncore_identifier_match(pe->compat, pmu->id)) { + pmu_uncore_identifier_match(pe->compat, pmu->id)) { perf_pmu__new_alias(pmu, pe->name, pe->desc, pe->event, /*val_fd=*/ NULL, - pe); + pe, + EVENT_SRC_SYS_JSON); } return 0; @@ -1035,6 +1076,12 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char pmu->max_precise = pmu_max_precise(dirfd, pmu); pmu->alias_name = pmu_find_alias_name(pmu, dirfd); pmu->events_table = perf_pmu__find_events_table(pmu); + /* + * Load the sys json events/aliases when loading the PMU as each event + * may have a different compat regular expression. We therefore can't + * know the number of sys json events/aliases without computing the + * regular expressions for them all. + */ pmu_add_sys_aliases(pmu); list_add_tail(&pmu->list, pmus); @@ -1632,15 +1679,15 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu) { size_t nr; - if (!pmu->sysfs_aliases_loaded) - pmu_aliases_parse(pmu); - - nr = pmu->sysfs_aliases; + pmu_aliases_parse(pmu); + nr = pmu->sysfs_aliases + pmu->sys_json_aliases;; if (pmu->cpu_aliases_added) - nr += pmu->loaded_json_aliases; + nr += pmu->cpu_json_aliases; else if (pmu->events_table) - nr += pmu_events_table__num_events(pmu->events_table, pmu) - pmu->loaded_json_aliases; + nr += pmu_events_table__num_events(pmu->events_table, pmu) - pmu->cpu_json_aliases; + else + assert(pmu->cpu_json_aliases == 0); return pmu->selectable ? nr + 1 : nr; } @@ -1693,6 +1740,7 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, struct strbuf sb; strbuf_init(&sb, /*hint=*/ 0); + pmu_aliases_parse(pmu); pmu_add_cpu_aliases(pmu); list_for_each_entry(event, &pmu->aliases, list) { size_t buf_used; @@ -2085,3 +2133,22 @@ void perf_pmu__delete(struct perf_pmu *pmu) zfree(&pmu->id); free(pmu); } + +const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config) +{ + struct perf_pmu_alias *event; + + if (!pmu) + return NULL; + + pmu_aliases_parse(pmu); + pmu_add_cpu_aliases(pmu); + list_for_each_entry(event, &pmu->aliases, list) { + struct perf_event_attr attr = {.config = 0,}; + int ret = perf_pmu__config(pmu, &attr, &event->terms, NULL); + + if (ret == 0 && config == attr.config) + return event->name; + } + return NULL; +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index e35d98520..77c59ebc0 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -123,8 +123,10 @@ struct perf_pmu { const struct pmu_events_table *events_table; /** @sysfs_aliases: Number of sysfs aliases loaded. */ uint32_t sysfs_aliases; - /** @sysfs_aliases: Number of json event aliases loaded. */ - uint32_t loaded_json_aliases; + /** @cpu_json_aliases: Number of json event aliases loaded specific to the CPUID. */ + uint32_t cpu_json_aliases; + /** @sys_json_aliases: Number of json event aliases loaded matching the PMU's identifier. */ + uint32_t sys_json_aliases; /** @sysfs_aliases_loaded: Are sysfs aliases loaded from disk? */ bool sysfs_aliases_loaded; /** @@ -273,5 +275,6 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus); void perf_pmu__delete(struct perf_pmu *pmu); struct perf_pmu *perf_pmus__find_core_pmu(void); +const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config); #endif /* __PMU_H */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 2a0ad9ecf..5c12459e9 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 075c0f79b..0aeb97c11 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -103,6 +103,16 @@ int perf_pmu__scan_file(const struct perf_pmu *pmu, const char *name, const char return EOF; } +const char *perf_pmu__name_from_config(struct perf_pmu *pmu __maybe_unused, u64 config __maybe_unused) +{ + return NULL; +} + +struct perf_pmu *perf_pmus__find_by_type(unsigned int type __maybe_unused) +{ + return NULL; +} + int perf_pmus__num_core_pmus(void) { return 1; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bfc1d705f..91d2f7f65 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -1223,6 +1223,9 @@ static void print_metric_headers(struct perf_stat_config *config, /* Print metrics headers only */ evlist__for_each_entry(evlist, counter) { + if (config->aggr_mode != AGGR_NONE && counter->metric_leader != counter) + continue; + os.evsel = counter; perf_stat__print_shadow_stats(config, counter, 0, diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 9ebdb8e13..68dbeae8d 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1287,7 +1287,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, { struct maps *kmaps = map__kmaps(map); struct kcore_mapfn_data md; - struct map *replacement_map = NULL; + struct map *map_ref, *replacement_map = NULL; struct machine *machine; bool is_64_bit; int err, fd; @@ -1365,6 +1365,24 @@ static int dso__load_kcore(struct dso *dso, struct map *map, if (!replacement_map) replacement_map = list_entry(md.maps.next, struct map_list_node, node)->map; + /* + * Update addresses of vmlinux map. Re-insert it to ensure maps are + * correctly ordered. Do this before using maps__merge_in() for the + * remaining maps so vmlinux gets split if necessary. + */ + map_ref = map__get(map); + maps__remove(kmaps, map_ref); + + map__set_start(map_ref, map__start(replacement_map)); + map__set_end(map_ref, map__end(replacement_map)); + map__set_pgoff(map_ref, map__pgoff(replacement_map)); + map__set_mapping_type(map_ref, map__mapping_type(replacement_map)); + + err = maps__insert(kmaps, map_ref); + map__put(map_ref); + if (err) + goto out_err; + /* Add new maps */ while (!list_empty(&md.maps)) { struct map_list_node *new_node = list_entry(md.maps.next, struct map_list_node, node); @@ -1372,22 +1390,8 @@ static int dso__load_kcore(struct dso *dso, struct map *map, list_del_init(&new_node->node); - if (RC_CHK_EQUAL(new_map, replacement_map)) { - struct map *map_ref; - - map__set_start(map, map__start(new_map)); - map__set_end(map, map__end(new_map)); - map__set_pgoff(map, map__pgoff(new_map)); - map__set_mapping_type(map, map__mapping_type(new_map)); - /* Ensure maps are correctly ordered */ - map_ref = map__get(map); - maps__remove(kmaps, map_ref); - err = maps__insert(kmaps, map_ref); - map__put(map_ref); - map__put(new_map); - if (err) - goto out_err; - } else { + /* skip if replacement_map, already inserted above */ + if (!RC_CHK_EQUAL(new_map, replacement_map)) { /* * Merge kcore map into existing maps, * and ensure that current maps (eBPF) @@ -1969,6 +1973,10 @@ out: return ret; } +/* + * Always takes ownership of vmlinux when vmlinux_allocated == true, even if + * it returns an error. + */ int dso__load_vmlinux(struct dso *dso, struct map *map, const char *vmlinux, bool vmlinux_allocated) { @@ -1987,8 +1995,11 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, else symtab_type = DSO_BINARY_TYPE__VMLINUX; - if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type)) + if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type)) { + if (vmlinux_allocated) + free((char *) vmlinux); return -1; + } /* * dso__load_sym() may copy 'dso' which will result in the copies having @@ -2031,7 +2042,6 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map) err = dso__load_vmlinux(dso, map, filename, true); if (err > 0) goto out; - free(filename); } out: return err; @@ -2183,7 +2193,6 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) err = dso__load_vmlinux(dso, map, filename, true); if (err > 0) return err; - free(filename); } if (!symbol_conf.ignore_vmlinux && vmlinux_path != NULL) { diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 1aa8962dc..515726489 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -39,12 +39,13 @@ int thread__init_maps(struct thread *thread, struct machine *machine) struct thread *thread__new(pid_t pid, pid_t tid) { - char *comm_str; - struct comm *comm; RC_STRUCT(thread) *_thread = zalloc(sizeof(*_thread)); struct thread *thread; if (ADD_RC_CHK(thread, _thread) != NULL) { + struct comm *comm; + char comm_str[32]; + thread__set_pid(thread, pid); thread__set_tid(thread, tid); thread__set_ppid(thread, -1); @@ -56,13 +57,8 @@ struct thread *thread__new(pid_t pid, pid_t tid) init_rwsem(thread__namespaces_lock(thread)); init_rwsem(thread__comm_lock(thread)); - comm_str = malloc(32); - if (!comm_str) - goto err_thread; - - snprintf(comm_str, 32, ":%d", tid); + snprintf(comm_str, sizeof(comm_str), ":%d", tid); comm = comm__new(comm_str, 0, false); - free(comm_str); if (!comm) goto err_thread; @@ -76,7 +72,7 @@ struct thread *thread__new(pid_t pid, pid_t tid) return thread; err_thread: - free(thread); + thread__delete(thread); return NULL; } -- cgit v1.2.3